diff --git "a/checkpoint-890/trainer_state.json" "b/checkpoint-890/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-890/trainer_state.json" @@ -0,0 +1,9113 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 45, + "global_step": 890, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0022471910112359553, + "grad_norm": 3.9492883682250977, + "learning_rate": 1.9662921348314604e-07, + "loss": 0.8103, + "step": 1 + }, + { + "epoch": 0.0044943820224719105, + "grad_norm": 4.117438793182373, + "learning_rate": 3.932584269662921e-07, + "loss": 0.8803, + "step": 2 + }, + { + "epoch": 0.006741573033707865, + "grad_norm": 3.809002161026001, + "learning_rate": 5.898876404494381e-07, + "loss": 0.8219, + "step": 3 + }, + { + "epoch": 0.008988764044943821, + "grad_norm": 0.7417504787445068, + "learning_rate": 7.865168539325842e-07, + "loss": 0.0574, + "step": 4 + }, + { + "epoch": 0.011235955056179775, + "grad_norm": 2.707460403442383, + "learning_rate": 9.831460674157302e-07, + "loss": 0.3044, + "step": 5 + }, + { + "epoch": 0.01348314606741573, + "grad_norm": 3.082705497741699, + "learning_rate": 1.1797752808988763e-06, + "loss": 0.3306, + "step": 6 + }, + { + "epoch": 0.015730337078651686, + "grad_norm": 3.102416753768921, + "learning_rate": 1.3764044943820223e-06, + "loss": 0.759, + "step": 7 + }, + { + "epoch": 0.017977528089887642, + "grad_norm": 0.6271047592163086, + "learning_rate": 1.5730337078651683e-06, + "loss": 0.0472, + "step": 8 + }, + { + "epoch": 0.020224719101123594, + "grad_norm": 3.1362593173980713, + "learning_rate": 1.7696629213483144e-06, + "loss": 0.7782, + "step": 9 + }, + { + "epoch": 0.02247191011235955, + "grad_norm": 1.124997615814209, + "learning_rate": 1.9662921348314604e-06, + "loss": 0.0757, + "step": 10 + }, + { + "epoch": 0.024719101123595506, + "grad_norm": 3.194413185119629, + "learning_rate": 2.1629213483146067e-06, + "loss": 0.7778, + "step": 11 + }, + { + "epoch": 0.02696629213483146, + "grad_norm": 3.966202974319458, + "learning_rate": 2.3595505617977525e-06, + "loss": 0.7111, + "step": 12 + }, + { + "epoch": 0.029213483146067417, + "grad_norm": 3.63393235206604, + "learning_rate": 2.5561797752808988e-06, + "loss": 0.6598, + "step": 13 + }, + { + "epoch": 0.03146067415730337, + "grad_norm": 4.087065696716309, + "learning_rate": 2.7528089887640446e-06, + "loss": 0.8901, + "step": 14 + }, + { + "epoch": 0.033707865168539325, + "grad_norm": 2.769573211669922, + "learning_rate": 2.949438202247191e-06, + "loss": 0.3206, + "step": 15 + }, + { + "epoch": 0.035955056179775284, + "grad_norm": 2.630620002746582, + "learning_rate": 3.1460674157303367e-06, + "loss": 0.3408, + "step": 16 + }, + { + "epoch": 0.038202247191011236, + "grad_norm": 2.9570937156677246, + "learning_rate": 3.342696629213483e-06, + "loss": 0.5623, + "step": 17 + }, + { + "epoch": 0.04044943820224719, + "grad_norm": 1.0999970436096191, + "learning_rate": 3.5393258426966288e-06, + "loss": 0.0758, + "step": 18 + }, + { + "epoch": 0.04269662921348315, + "grad_norm": 5.516472816467285, + "learning_rate": 3.735955056179775e-06, + "loss": 0.994, + "step": 19 + }, + { + "epoch": 0.0449438202247191, + "grad_norm": 6.245299816131592, + "learning_rate": 3.932584269662921e-06, + "loss": 2.4196, + "step": 20 + }, + { + "epoch": 0.04719101123595506, + "grad_norm": 0.546605110168457, + "learning_rate": 4.129213483146067e-06, + "loss": 0.0561, + "step": 21 + }, + { + "epoch": 0.04943820224719101, + "grad_norm": 0.7049635648727417, + "learning_rate": 4.325842696629213e-06, + "loss": 0.0827, + "step": 22 + }, + { + "epoch": 0.051685393258426963, + "grad_norm": 3.1022439002990723, + "learning_rate": 4.522471910112359e-06, + "loss": 0.7405, + "step": 23 + }, + { + "epoch": 0.05393258426966292, + "grad_norm": 4.534759044647217, + "learning_rate": 4.719101123595505e-06, + "loss": 0.9656, + "step": 24 + }, + { + "epoch": 0.056179775280898875, + "grad_norm": 3.0486032962799072, + "learning_rate": 4.915730337078652e-06, + "loss": 0.7855, + "step": 25 + }, + { + "epoch": 0.058426966292134834, + "grad_norm": 3.7457478046417236, + "learning_rate": 5.1123595505617975e-06, + "loss": 0.6349, + "step": 26 + }, + { + "epoch": 0.060674157303370786, + "grad_norm": 3.2051479816436768, + "learning_rate": 5.308988764044943e-06, + "loss": 0.8087, + "step": 27 + }, + { + "epoch": 0.06292134831460675, + "grad_norm": 4.389094829559326, + "learning_rate": 5.505617977528089e-06, + "loss": 0.9282, + "step": 28 + }, + { + "epoch": 0.0651685393258427, + "grad_norm": 2.920410394668579, + "learning_rate": 5.702247191011236e-06, + "loss": 0.3377, + "step": 29 + }, + { + "epoch": 0.06741573033707865, + "grad_norm": 2.7193148136138916, + "learning_rate": 5.898876404494382e-06, + "loss": 0.3289, + "step": 30 + }, + { + "epoch": 0.0696629213483146, + "grad_norm": 4.0008225440979, + "learning_rate": 6.0955056179775275e-06, + "loss": 0.6314, + "step": 31 + }, + { + "epoch": 0.07191011235955057, + "grad_norm": 0.5842159390449524, + "learning_rate": 6.292134831460673e-06, + "loss": 0.0611, + "step": 32 + }, + { + "epoch": 0.07415730337078652, + "grad_norm": 3.1256043910980225, + "learning_rate": 6.48876404494382e-06, + "loss": 0.8942, + "step": 33 + }, + { + "epoch": 0.07640449438202247, + "grad_norm": 0.9526051878929138, + "learning_rate": 6.685393258426966e-06, + "loss": 0.0701, + "step": 34 + }, + { + "epoch": 0.07865168539325842, + "grad_norm": 4.061926364898682, + "learning_rate": 6.882022471910112e-06, + "loss": 0.8506, + "step": 35 + }, + { + "epoch": 0.08089887640449438, + "grad_norm": 2.8898491859436035, + "learning_rate": 7.0786516853932575e-06, + "loss": 0.3386, + "step": 36 + }, + { + "epoch": 0.08314606741573034, + "grad_norm": 0.9806709289550781, + "learning_rate": 7.275280898876404e-06, + "loss": 0.0701, + "step": 37 + }, + { + "epoch": 0.0853932584269663, + "grad_norm": 3.8004391193389893, + "learning_rate": 7.47191011235955e-06, + "loss": 0.8042, + "step": 38 + }, + { + "epoch": 0.08764044943820225, + "grad_norm": 4.089083194732666, + "learning_rate": 7.668539325842697e-06, + "loss": 0.8744, + "step": 39 + }, + { + "epoch": 0.0898876404494382, + "grad_norm": 3.419440984725952, + "learning_rate": 7.865168539325842e-06, + "loss": 0.8644, + "step": 40 + }, + { + "epoch": 0.09213483146067415, + "grad_norm": 4.094921588897705, + "learning_rate": 8.061797752808988e-06, + "loss": 0.8647, + "step": 41 + }, + { + "epoch": 0.09438202247191012, + "grad_norm": 3.9199764728546143, + "learning_rate": 8.258426966292133e-06, + "loss": 0.7916, + "step": 42 + }, + { + "epoch": 0.09662921348314607, + "grad_norm": 4.082360744476318, + "learning_rate": 8.45505617977528e-06, + "loss": 0.8599, + "step": 43 + }, + { + "epoch": 0.09887640449438202, + "grad_norm": 0.6443855166435242, + "learning_rate": 8.651685393258427e-06, + "loss": 0.0523, + "step": 44 + }, + { + "epoch": 0.10112359550561797, + "grad_norm": 4.051048278808594, + "learning_rate": 8.848314606741572e-06, + "loss": 0.6968, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8300318121910095, + "eval_VitaminC_cosine_ap": 0.5514483751609435, + "eval_VitaminC_cosine_f1": 0.6657718120805369, + "eval_VitaminC_cosine_f1_threshold": 0.37456807494163513, + "eval_VitaminC_cosine_precision": 0.5020242914979757, + "eval_VitaminC_cosine_recall": 0.9880478087649402, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 311.380615234375, + "eval_VitaminC_dot_ap": 0.5333497363350208, + "eval_VitaminC_dot_f1": 0.6684709066305818, + "eval_VitaminC_dot_f1_threshold": 144.8927001953125, + "eval_VitaminC_dot_precision": 0.5061475409836066, + "eval_VitaminC_dot_recall": 0.9840637450199203, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 11.028482437133789, + "eval_VitaminC_euclidean_ap": 0.5544340410314673, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 23.38451385498047, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 232.38790893554688, + "eval_VitaminC_manhattan_ap": 0.5515569514532939, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 498.126220703125, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 311.380615234375, + "eval_VitaminC_max_ap": 0.5544340410314673, + "eval_VitaminC_max_f1": 0.6684709066305818, + "eval_VitaminC_max_f1_threshold": 498.126220703125, + "eval_VitaminC_max_precision": 0.5061475409836066, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5544340410314673, + "eval_sts-test_pearson_cosine": 0.8803067271464453, + "eval_sts-test_pearson_dot": 0.8698285291814508, + "eval_sts-test_pearson_euclidean": 0.9023937835918766, + "eval_sts-test_pearson_manhattan": 0.9020751259156048, + "eval_sts-test_pearson_max": 0.9023937835918766, + "eval_sts-test_spearman_cosine": 0.9038005474254912, + "eval_sts-test_spearman_dot": 0.8707897794601254, + "eval_sts-test_spearman_euclidean": 0.8989733631129851, + "eval_sts-test_spearman_manhattan": 0.8980189529612906, + "eval_sts-test_spearman_max": 0.9038005474254912, + "eval_vitaminc-pairs_loss": 1.7273772954940796, + "eval_vitaminc-pairs_runtime": 1.8924, + "eval_vitaminc-pairs_samples_per_second": 57.071, + "eval_vitaminc-pairs_steps_per_second": 1.057, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_negation-triplets_loss": 0.9174526929855347, + "eval_negation-triplets_runtime": 0.2972, + "eval_negation-triplets_samples_per_second": 215.314, + "eval_negation-triplets_steps_per_second": 3.364, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_scitail-pairs-pos_loss": 0.07368183881044388, + "eval_scitail-pairs-pos_runtime": 0.379, + "eval_scitail-pairs-pos_samples_per_second": 142.492, + "eval_scitail-pairs-pos_steps_per_second": 2.639, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_scitail-pairs-qa_loss": 0.001584450714290142, + "eval_scitail-pairs-qa_runtime": 0.5178, + "eval_scitail-pairs-qa_samples_per_second": 247.198, + "eval_scitail-pairs-qa_steps_per_second": 3.862, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_xsum-pairs_loss": 0.038235221058130264, + "eval_xsum-pairs_runtime": 2.7268, + "eval_xsum-pairs_samples_per_second": 46.941, + "eval_xsum-pairs_steps_per_second": 0.733, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_sciq_pairs_loss": 0.01538097020238638, + "eval_sciq_pairs_runtime": 2.7808, + "eval_sciq_pairs_samples_per_second": 46.029, + "eval_sciq_pairs_steps_per_second": 0.719, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_qasc_pairs_loss": 0.09078988432884216, + "eval_qasc_pairs_runtime": 0.6473, + "eval_qasc_pairs_samples_per_second": 197.758, + "eval_qasc_pairs_steps_per_second": 3.09, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_openbookqa_pairs_loss": 0.6754768490791321, + "eval_openbookqa_pairs_runtime": 0.573, + "eval_openbookqa_pairs_samples_per_second": 223.397, + "eval_openbookqa_pairs_steps_per_second": 3.491, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_msmarco_pairs_loss": 0.15991328656673431, + "eval_msmarco_pairs_runtime": 1.487, + "eval_msmarco_pairs_samples_per_second": 86.078, + "eval_msmarco_pairs_steps_per_second": 1.345, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_nq_pairs_loss": 0.09591890126466751, + "eval_nq_pairs_runtime": 2.3943, + "eval_nq_pairs_samples_per_second": 53.459, + "eval_nq_pairs_steps_per_second": 0.835, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_trivia_pairs_loss": 0.5305934548377991, + "eval_trivia_pairs_runtime": 3.5752, + "eval_trivia_pairs_samples_per_second": 35.802, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_gooaq_pairs_loss": 0.29681000113487244, + "eval_gooaq_pairs_runtime": 0.9087, + "eval_gooaq_pairs_samples_per_second": 140.861, + "eval_gooaq_pairs_steps_per_second": 2.201, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_paws-pos_loss": 0.024501051753759384, + "eval_paws-pos_runtime": 0.6773, + "eval_paws-pos_samples_per_second": 188.996, + "eval_paws-pos_steps_per_second": 2.953, + "step": 45 + }, + { + "epoch": 0.10337078651685393, + "grad_norm": 2.9021923542022705, + "learning_rate": 9.044943820224718e-06, + "loss": 0.3376, + "step": 46 + }, + { + "epoch": 0.10561797752808989, + "grad_norm": 3.179288625717163, + "learning_rate": 9.241573033707863e-06, + "loss": 0.5174, + "step": 47 + }, + { + "epoch": 0.10786516853932585, + "grad_norm": 3.1919493675231934, + "learning_rate": 9.43820224719101e-06, + "loss": 0.8162, + "step": 48 + }, + { + "epoch": 0.1101123595505618, + "grad_norm": 2.8602521419525146, + "learning_rate": 9.634831460674157e-06, + "loss": 0.3545, + "step": 49 + }, + { + "epoch": 0.11235955056179775, + "grad_norm": 2.7570478916168213, + "learning_rate": 9.831460674157303e-06, + "loss": 0.315, + "step": 50 + }, + { + "epoch": 0.1146067415730337, + "grad_norm": 0.8641514778137207, + "learning_rate": 1.0028089887640448e-05, + "loss": 0.0627, + "step": 51 + }, + { + "epoch": 0.11685393258426967, + "grad_norm": 3.9437484741210938, + "learning_rate": 1.0224719101123595e-05, + "loss": 0.8851, + "step": 52 + }, + { + "epoch": 0.11910112359550562, + "grad_norm": 4.144773006439209, + "learning_rate": 1.042134831460674e-05, + "loss": 0.8382, + "step": 53 + }, + { + "epoch": 0.12134831460674157, + "grad_norm": 4.277736186981201, + "learning_rate": 1.0617977528089887e-05, + "loss": 0.733, + "step": 54 + }, + { + "epoch": 0.12359550561797752, + "grad_norm": 4.025904178619385, + "learning_rate": 1.0814606741573032e-05, + "loss": 0.7173, + "step": 55 + }, + { + "epoch": 0.1258426966292135, + "grad_norm": 3.923046827316284, + "learning_rate": 1.1011235955056178e-05, + "loss": 0.7659, + "step": 56 + }, + { + "epoch": 0.12808988764044943, + "grad_norm": 3.2707138061523438, + "learning_rate": 1.1207865168539325e-05, + "loss": 0.793, + "step": 57 + }, + { + "epoch": 0.1303370786516854, + "grad_norm": 3.1660959720611572, + "learning_rate": 1.1404494382022472e-05, + "loss": 0.5426, + "step": 58 + }, + { + "epoch": 0.13258426966292136, + "grad_norm": 4.5236663818359375, + "learning_rate": 1.1601123595505617e-05, + "loss": 0.7641, + "step": 59 + }, + { + "epoch": 0.1348314606741573, + "grad_norm": 0.5771021246910095, + "learning_rate": 1.1797752808988763e-05, + "loss": 0.0657, + "step": 60 + }, + { + "epoch": 0.13707865168539327, + "grad_norm": 3.8541343212127686, + "learning_rate": 1.1994382022471908e-05, + "loss": 0.7836, + "step": 61 + }, + { + "epoch": 0.1393258426966292, + "grad_norm": 4.284148693084717, + "learning_rate": 1.2191011235955055e-05, + "loss": 0.9306, + "step": 62 + }, + { + "epoch": 0.14157303370786517, + "grad_norm": 4.175032615661621, + "learning_rate": 1.23876404494382e-05, + "loss": 0.8673, + "step": 63 + }, + { + "epoch": 0.14382022471910114, + "grad_norm": 5.025452136993408, + "learning_rate": 1.2584269662921347e-05, + "loss": 0.9296, + "step": 64 + }, + { + "epoch": 0.14606741573033707, + "grad_norm": 3.970745086669922, + "learning_rate": 1.2780898876404493e-05, + "loss": 0.8211, + "step": 65 + }, + { + "epoch": 0.14831460674157304, + "grad_norm": 3.150197744369507, + "learning_rate": 1.297752808988764e-05, + "loss": 0.7685, + "step": 66 + }, + { + "epoch": 0.15056179775280898, + "grad_norm": 4.280994415283203, + "learning_rate": 1.3174157303370785e-05, + "loss": 0.7139, + "step": 67 + }, + { + "epoch": 0.15280898876404495, + "grad_norm": 4.288730621337891, + "learning_rate": 1.3370786516853932e-05, + "loss": 0.8241, + "step": 68 + }, + { + "epoch": 0.1550561797752809, + "grad_norm": 3.7402424812316895, + "learning_rate": 1.3567415730337077e-05, + "loss": 0.6256, + "step": 69 + }, + { + "epoch": 0.15730337078651685, + "grad_norm": 4.478890895843506, + "learning_rate": 1.3764044943820223e-05, + "loss": 0.8842, + "step": 70 + }, + { + "epoch": 0.15955056179775282, + "grad_norm": 3.8147876262664795, + "learning_rate": 1.3960674157303368e-05, + "loss": 0.804, + "step": 71 + }, + { + "epoch": 0.16179775280898875, + "grad_norm": 0.7314035296440125, + "learning_rate": 1.4157303370786515e-05, + "loss": 0.0989, + "step": 72 + }, + { + "epoch": 0.16404494382022472, + "grad_norm": 3.074303150177002, + "learning_rate": 1.4353932584269662e-05, + "loss": 0.332, + "step": 73 + }, + { + "epoch": 0.1662921348314607, + "grad_norm": 3.414987325668335, + "learning_rate": 1.4550561797752808e-05, + "loss": 0.5736, + "step": 74 + }, + { + "epoch": 0.16853932584269662, + "grad_norm": 3.7946674823760986, + "learning_rate": 1.4747191011235953e-05, + "loss": 0.8285, + "step": 75 + }, + { + "epoch": 0.1707865168539326, + "grad_norm": 4.310474395751953, + "learning_rate": 1.49438202247191e-05, + "loss": 0.9561, + "step": 76 + }, + { + "epoch": 0.17303370786516853, + "grad_norm": 0.9791378974914551, + "learning_rate": 1.5140449438202245e-05, + "loss": 0.0633, + "step": 77 + }, + { + "epoch": 0.1752808988764045, + "grad_norm": 0.6351795196533203, + "learning_rate": 1.5337078651685393e-05, + "loss": 0.0848, + "step": 78 + }, + { + "epoch": 0.17752808988764046, + "grad_norm": 3.4832303524017334, + "learning_rate": 1.553370786516854e-05, + "loss": 0.8325, + "step": 79 + }, + { + "epoch": 0.1797752808988764, + "grad_norm": 5.115800380706787, + "learning_rate": 1.5730337078651683e-05, + "loss": 1.0011, + "step": 80 + }, + { + "epoch": 0.18202247191011237, + "grad_norm": 3.552396297454834, + "learning_rate": 1.592696629213483e-05, + "loss": 0.8697, + "step": 81 + }, + { + "epoch": 0.1842696629213483, + "grad_norm": 4.491541862487793, + "learning_rate": 1.6123595505617977e-05, + "loss": 0.8344, + "step": 82 + }, + { + "epoch": 0.18651685393258427, + "grad_norm": 4.73278284072876, + "learning_rate": 1.6320224719101122e-05, + "loss": 0.9967, + "step": 83 + }, + { + "epoch": 0.18876404494382024, + "grad_norm": 2.994192123413086, + "learning_rate": 1.6516853932584267e-05, + "loss": 0.4638, + "step": 84 + }, + { + "epoch": 0.19101123595505617, + "grad_norm": 4.142394542694092, + "learning_rate": 1.6713483146067415e-05, + "loss": 0.8994, + "step": 85 + }, + { + "epoch": 0.19325842696629214, + "grad_norm": 4.149839401245117, + "learning_rate": 1.691011235955056e-05, + "loss": 0.7789, + "step": 86 + }, + { + "epoch": 0.19550561797752808, + "grad_norm": 0.45795938372612, + "learning_rate": 1.7106741573033705e-05, + "loss": 0.0555, + "step": 87 + }, + { + "epoch": 0.19775280898876405, + "grad_norm": 3.4293618202209473, + "learning_rate": 1.7303370786516853e-05, + "loss": 0.3778, + "step": 88 + }, + { + "epoch": 0.2, + "grad_norm": 4.041529655456543, + "learning_rate": 1.75e-05, + "loss": 0.708, + "step": 89 + }, + { + "epoch": 0.20224719101123595, + "grad_norm": 0.6160458922386169, + "learning_rate": 1.7696629213483143e-05, + "loss": 0.0689, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.74173903465271, + "eval_VitaminC_cosine_ap": 0.5513770735348443, + "eval_VitaminC_cosine_f1": 0.6675531914893617, + "eval_VitaminC_cosine_f1_threshold": 0.32480987906455994, + "eval_VitaminC_cosine_precision": 0.500998003992016, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.560546875, + "eval_VitaminC_dot_accuracy_threshold": 297.664794921875, + "eval_VitaminC_dot_ap": 0.5340088824099496, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 126.67618560791016, + "eval_VitaminC_dot_precision": 0.501002004008016, + "eval_VitaminC_dot_recall": 0.9960159362549801, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 14.345688819885254, + "eval_VitaminC_euclidean_ap": 0.5542145004976253, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 23.381019592285156, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 232.7296142578125, + "eval_VitaminC_manhattan_ap": 0.5523953693907266, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 496.4290466308594, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 297.664794921875, + "eval_VitaminC_max_ap": 0.5542145004976253, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 496.4290466308594, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5542145004976253, + "eval_sts-test_pearson_cosine": 0.8800782580988616, + "eval_sts-test_pearson_dot": 0.8687642290872662, + "eval_sts-test_pearson_euclidean": 0.9034088230546415, + "eval_sts-test_pearson_manhattan": 0.9030146212284895, + "eval_sts-test_pearson_max": 0.9034088230546415, + "eval_sts-test_spearman_cosine": 0.904560289590133, + "eval_sts-test_spearman_dot": 0.8705944849554133, + "eval_sts-test_spearman_euclidean": 0.8998959103665689, + "eval_sts-test_spearman_manhattan": 0.8995891404697307, + "eval_sts-test_spearman_max": 0.904560289590133, + "eval_vitaminc-pairs_loss": 1.6141985654830933, + "eval_vitaminc-pairs_runtime": 1.864, + "eval_vitaminc-pairs_samples_per_second": 57.94, + "eval_vitaminc-pairs_steps_per_second": 1.073, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_negation-triplets_loss": 0.9220322370529175, + "eval_negation-triplets_runtime": 0.3199, + "eval_negation-triplets_samples_per_second": 200.043, + "eval_negation-triplets_steps_per_second": 3.126, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_scitail-pairs-pos_loss": 0.0654294565320015, + "eval_scitail-pairs-pos_runtime": 0.4625, + "eval_scitail-pairs-pos_samples_per_second": 116.76, + "eval_scitail-pairs-pos_steps_per_second": 2.162, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_scitail-pairs-qa_loss": 0.0015887805493548512, + "eval_scitail-pairs-qa_runtime": 0.5768, + "eval_scitail-pairs-qa_samples_per_second": 221.899, + "eval_scitail-pairs-qa_steps_per_second": 3.467, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_xsum-pairs_loss": 0.03991687670350075, + "eval_xsum-pairs_runtime": 2.7403, + "eval_xsum-pairs_samples_per_second": 46.71, + "eval_xsum-pairs_steps_per_second": 0.73, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_sciq_pairs_loss": 0.01584962010383606, + "eval_sciq_pairs_runtime": 2.8429, + "eval_sciq_pairs_samples_per_second": 45.024, + "eval_sciq_pairs_steps_per_second": 0.703, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_qasc_pairs_loss": 0.09112343192100525, + "eval_qasc_pairs_runtime": 0.6492, + "eval_qasc_pairs_samples_per_second": 197.154, + "eval_qasc_pairs_steps_per_second": 3.081, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_openbookqa_pairs_loss": 0.7132729887962341, + "eval_openbookqa_pairs_runtime": 0.5847, + "eval_openbookqa_pairs_samples_per_second": 218.922, + "eval_openbookqa_pairs_steps_per_second": 3.421, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_msmarco_pairs_loss": 0.15173853933811188, + "eval_msmarco_pairs_runtime": 1.4966, + "eval_msmarco_pairs_samples_per_second": 85.527, + "eval_msmarco_pairs_steps_per_second": 1.336, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_nq_pairs_loss": 0.09653442353010178, + "eval_nq_pairs_runtime": 2.3749, + "eval_nq_pairs_samples_per_second": 53.897, + "eval_nq_pairs_steps_per_second": 0.842, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_trivia_pairs_loss": 0.5191965699195862, + "eval_trivia_pairs_runtime": 3.6006, + "eval_trivia_pairs_samples_per_second": 35.55, + "eval_trivia_pairs_steps_per_second": 0.555, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_gooaq_pairs_loss": 0.30713126063346863, + "eval_gooaq_pairs_runtime": 0.9131, + "eval_gooaq_pairs_samples_per_second": 140.178, + "eval_gooaq_pairs_steps_per_second": 2.19, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_paws-pos_loss": 0.024471310898661613, + "eval_paws-pos_runtime": 0.6872, + "eval_paws-pos_samples_per_second": 186.254, + "eval_paws-pos_steps_per_second": 2.91, + "step": 90 + }, + { + "epoch": 0.20449438202247192, + "grad_norm": 6.209661483764648, + "learning_rate": 1.7893258426966292e-05, + "loss": 2.3489, + "step": 91 + }, + { + "epoch": 0.20674157303370785, + "grad_norm": 3.1821141242980957, + "learning_rate": 1.8089887640449437e-05, + "loss": 0.741, + "step": 92 + }, + { + "epoch": 0.20898876404494382, + "grad_norm": 3.871994972229004, + "learning_rate": 1.8286516853932585e-05, + "loss": 0.7729, + "step": 93 + }, + { + "epoch": 0.21123595505617979, + "grad_norm": 0.5280765891075134, + "learning_rate": 1.8483146067415727e-05, + "loss": 0.0631, + "step": 94 + }, + { + "epoch": 0.21348314606741572, + "grad_norm": 4.475915431976318, + "learning_rate": 1.8679775280898875e-05, + "loss": 0.9342, + "step": 95 + }, + { + "epoch": 0.2157303370786517, + "grad_norm": 3.949381113052368, + "learning_rate": 1.887640449438202e-05, + "loss": 0.8581, + "step": 96 + }, + { + "epoch": 0.21797752808988763, + "grad_norm": 2.910426616668701, + "learning_rate": 1.907303370786517e-05, + "loss": 0.5198, + "step": 97 + }, + { + "epoch": 0.2202247191011236, + "grad_norm": 4.028941631317139, + "learning_rate": 1.9269662921348313e-05, + "loss": 0.846, + "step": 98 + }, + { + "epoch": 0.22247191011235956, + "grad_norm": 4.183433532714844, + "learning_rate": 1.946629213483146e-05, + "loss": 0.6581, + "step": 99 + }, + { + "epoch": 0.2247191011235955, + "grad_norm": 3.348114252090454, + "learning_rate": 1.9662921348314607e-05, + "loss": 0.3579, + "step": 100 + }, + { + "epoch": 0.22696629213483147, + "grad_norm": 4.055211544036865, + "learning_rate": 1.9859550561797752e-05, + "loss": 0.908, + "step": 101 + }, + { + "epoch": 0.2292134831460674, + "grad_norm": 1.0024710893630981, + "learning_rate": 2.0056179775280897e-05, + "loss": 0.0664, + "step": 102 + }, + { + "epoch": 0.23146067415730337, + "grad_norm": 3.582249641418457, + "learning_rate": 2.0252808988764042e-05, + "loss": 0.5411, + "step": 103 + }, + { + "epoch": 0.23370786516853934, + "grad_norm": 4.226349830627441, + "learning_rate": 2.044943820224719e-05, + "loss": 0.9163, + "step": 104 + }, + { + "epoch": 0.23595505617977527, + "grad_norm": 3.002727508544922, + "learning_rate": 2.0646067415730335e-05, + "loss": 0.7975, + "step": 105 + }, + { + "epoch": 0.23820224719101124, + "grad_norm": 3.5497515201568604, + "learning_rate": 2.084269662921348e-05, + "loss": 0.37, + "step": 106 + }, + { + "epoch": 0.24044943820224718, + "grad_norm": 4.381045341491699, + "learning_rate": 2.103932584269663e-05, + "loss": 0.8495, + "step": 107 + }, + { + "epoch": 0.24269662921348314, + "grad_norm": 3.926840305328369, + "learning_rate": 2.1235955056179773e-05, + "loss": 0.8073, + "step": 108 + }, + { + "epoch": 0.2449438202247191, + "grad_norm": 3.0835390090942383, + "learning_rate": 2.1432584269662922e-05, + "loss": 0.7563, + "step": 109 + }, + { + "epoch": 0.24719101123595505, + "grad_norm": 4.230669975280762, + "learning_rate": 2.1629213483146063e-05, + "loss": 0.6585, + "step": 110 + }, + { + "epoch": 0.24943820224719102, + "grad_norm": 2.8849070072174072, + "learning_rate": 2.1825842696629212e-05, + "loss": 0.3246, + "step": 111 + }, + { + "epoch": 0.251685393258427, + "grad_norm": 4.796951770782471, + "learning_rate": 2.2022471910112357e-05, + "loss": 0.9718, + "step": 112 + }, + { + "epoch": 0.2539325842696629, + "grad_norm": 4.60318660736084, + "learning_rate": 2.2219101123595505e-05, + "loss": 0.8584, + "step": 113 + }, + { + "epoch": 0.25617977528089886, + "grad_norm": 3.098703384399414, + "learning_rate": 2.241573033707865e-05, + "loss": 0.3385, + "step": 114 + }, + { + "epoch": 0.25842696629213485, + "grad_norm": 2.9519224166870117, + "learning_rate": 2.2612359550561795e-05, + "loss": 0.323, + "step": 115 + }, + { + "epoch": 0.2606741573033708, + "grad_norm": 2.913742780685425, + "learning_rate": 2.2808988764044944e-05, + "loss": 0.3359, + "step": 116 + }, + { + "epoch": 0.26292134831460673, + "grad_norm": 4.148440837860107, + "learning_rate": 2.300561797752809e-05, + "loss": 0.6955, + "step": 117 + }, + { + "epoch": 0.2651685393258427, + "grad_norm": 0.8463248610496521, + "learning_rate": 2.3202247191011234e-05, + "loss": 0.0539, + "step": 118 + }, + { + "epoch": 0.26741573033707866, + "grad_norm": 0.7284589409828186, + "learning_rate": 2.339887640449438e-05, + "loss": 0.0507, + "step": 119 + }, + { + "epoch": 0.2696629213483146, + "grad_norm": 3.615086317062378, + "learning_rate": 2.3595505617977527e-05, + "loss": 0.314, + "step": 120 + }, + { + "epoch": 0.27191011235955054, + "grad_norm": 5.229820728302002, + "learning_rate": 2.3792134831460672e-05, + "loss": 1.0339, + "step": 121 + }, + { + "epoch": 0.27415730337078653, + "grad_norm": 3.6847782135009766, + "learning_rate": 2.3988764044943817e-05, + "loss": 0.3158, + "step": 122 + }, + { + "epoch": 0.27640449438202247, + "grad_norm": 4.280517578125, + "learning_rate": 2.4185393258426965e-05, + "loss": 0.7809, + "step": 123 + }, + { + "epoch": 0.2786516853932584, + "grad_norm": 4.476150035858154, + "learning_rate": 2.438202247191011e-05, + "loss": 0.9516, + "step": 124 + }, + { + "epoch": 0.2808988764044944, + "grad_norm": 2.7380239963531494, + "learning_rate": 2.457865168539326e-05, + "loss": 0.3117, + "step": 125 + }, + { + "epoch": 0.28314606741573034, + "grad_norm": 3.9667162895202637, + "learning_rate": 2.47752808988764e-05, + "loss": 0.8366, + "step": 126 + }, + { + "epoch": 0.2853932584269663, + "grad_norm": 4.552999019622803, + "learning_rate": 2.497191011235955e-05, + "loss": 0.8033, + "step": 127 + }, + { + "epoch": 0.2876404494382023, + "grad_norm": 3.4238576889038086, + "learning_rate": 2.5168539325842694e-05, + "loss": 0.7253, + "step": 128 + }, + { + "epoch": 0.2898876404494382, + "grad_norm": 4.677807331085205, + "learning_rate": 2.5365168539325842e-05, + "loss": 0.8345, + "step": 129 + }, + { + "epoch": 0.29213483146067415, + "grad_norm": 4.282113075256348, + "learning_rate": 2.5561797752808987e-05, + "loss": 0.7532, + "step": 130 + }, + { + "epoch": 0.2943820224719101, + "grad_norm": 4.375221252441406, + "learning_rate": 2.5758426966292132e-05, + "loss": 0.8247, + "step": 131 + }, + { + "epoch": 0.2966292134831461, + "grad_norm": 3.2591633796691895, + "learning_rate": 2.595505617977528e-05, + "loss": 0.5175, + "step": 132 + }, + { + "epoch": 0.298876404494382, + "grad_norm": 4.146636962890625, + "learning_rate": 2.6151685393258425e-05, + "loss": 0.7813, + "step": 133 + }, + { + "epoch": 0.30112359550561796, + "grad_norm": 4.2413249015808105, + "learning_rate": 2.634831460674157e-05, + "loss": 0.6582, + "step": 134 + }, + { + "epoch": 0.30337078651685395, + "grad_norm": 4.541455268859863, + "learning_rate": 2.6544943820224715e-05, + "loss": 0.3484, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_VitaminC_cosine_accuracy": 0.560546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.7956135272979736, + "eval_VitaminC_cosine_ap": 0.5505565383154402, + "eval_VitaminC_cosine_f1": 0.6684709066305818, + "eval_VitaminC_cosine_f1_threshold": 0.40466147661209106, + "eval_VitaminC_cosine_precision": 0.5061475409836066, + "eval_VitaminC_cosine_recall": 0.9840637450199203, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 312.2774658203125, + "eval_VitaminC_dot_ap": 0.5365135091766033, + "eval_VitaminC_dot_f1": 0.6684856753069577, + "eval_VitaminC_dot_f1_threshold": 157.33203125, + "eval_VitaminC_dot_precision": 0.508298755186722, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 12.184114456176758, + "eval_VitaminC_euclidean_ap": 0.5517706579195627, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 23.68879508972168, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 241.22061157226562, + "eval_VitaminC_manhattan_ap": 0.5494156168773414, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 510.2530212402344, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 312.2774658203125, + "eval_VitaminC_max_ap": 0.5517706579195627, + "eval_VitaminC_max_f1": 0.6684856753069577, + "eval_VitaminC_max_f1_threshold": 510.2530212402344, + "eval_VitaminC_max_precision": 0.508298755186722, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5517706579195627, + "eval_sts-test_pearson_cosine": 0.8812438499723412, + "eval_sts-test_pearson_dot": 0.8695651753004092, + "eval_sts-test_pearson_euclidean": 0.9036940037118162, + "eval_sts-test_pearson_manhattan": 0.9035516699922166, + "eval_sts-test_pearson_max": 0.9036940037118162, + "eval_sts-test_spearman_cosine": 0.9049742835092648, + "eval_sts-test_spearman_dot": 0.8707925987895928, + "eval_sts-test_spearman_euclidean": 0.9003956924537878, + "eval_sts-test_spearman_manhattan": 0.9002747745455083, + "eval_sts-test_spearman_max": 0.9049742835092648, + "eval_vitaminc-pairs_loss": 1.5520410537719727, + "eval_vitaminc-pairs_runtime": 1.8323, + "eval_vitaminc-pairs_samples_per_second": 58.943, + "eval_vitaminc-pairs_steps_per_second": 1.092, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_negation-triplets_loss": 0.9211694002151489, + "eval_negation-triplets_runtime": 0.2923, + "eval_negation-triplets_samples_per_second": 218.93, + "eval_negation-triplets_steps_per_second": 3.421, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_scitail-pairs-pos_loss": 0.07377135753631592, + "eval_scitail-pairs-pos_runtime": 0.3681, + "eval_scitail-pairs-pos_samples_per_second": 146.691, + "eval_scitail-pairs-pos_steps_per_second": 2.716, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_scitail-pairs-qa_loss": 0.00150959100574255, + "eval_scitail-pairs-qa_runtime": 0.5123, + "eval_scitail-pairs-qa_samples_per_second": 249.842, + "eval_scitail-pairs-qa_steps_per_second": 3.904, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_xsum-pairs_loss": 0.036599572747945786, + "eval_xsum-pairs_runtime": 2.7238, + "eval_xsum-pairs_samples_per_second": 46.994, + "eval_xsum-pairs_steps_per_second": 0.734, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_sciq_pairs_loss": 0.01615014858543873, + "eval_sciq_pairs_runtime": 2.8064, + "eval_sciq_pairs_samples_per_second": 45.61, + "eval_sciq_pairs_steps_per_second": 0.713, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_qasc_pairs_loss": 0.09235507994890213, + "eval_qasc_pairs_runtime": 0.6488, + "eval_qasc_pairs_samples_per_second": 197.276, + "eval_qasc_pairs_steps_per_second": 3.082, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_openbookqa_pairs_loss": 0.6891775727272034, + "eval_openbookqa_pairs_runtime": 0.5698, + "eval_openbookqa_pairs_samples_per_second": 224.641, + "eval_openbookqa_pairs_steps_per_second": 3.51, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_msmarco_pairs_loss": 0.16766037046909332, + "eval_msmarco_pairs_runtime": 1.4798, + "eval_msmarco_pairs_samples_per_second": 86.499, + "eval_msmarco_pairs_steps_per_second": 1.352, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_nq_pairs_loss": 0.09737721085548401, + "eval_nq_pairs_runtime": 2.3409, + "eval_nq_pairs_samples_per_second": 54.68, + "eval_nq_pairs_steps_per_second": 0.854, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_trivia_pairs_loss": 0.5458433032035828, + "eval_trivia_pairs_runtime": 3.5771, + "eval_trivia_pairs_samples_per_second": 35.783, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_gooaq_pairs_loss": 0.3082329332828522, + "eval_gooaq_pairs_runtime": 0.9181, + "eval_gooaq_pairs_samples_per_second": 139.413, + "eval_gooaq_pairs_steps_per_second": 2.178, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_paws-pos_loss": 0.02423396334052086, + "eval_paws-pos_runtime": 0.6827, + "eval_paws-pos_samples_per_second": 187.501, + "eval_paws-pos_steps_per_second": 2.93, + "step": 135 + }, + { + "epoch": 0.3056179775280899, + "grad_norm": 4.549901485443115, + "learning_rate": 2.6741573033707864e-05, + "loss": 0.7648, + "step": 136 + }, + { + "epoch": 0.30786516853932583, + "grad_norm": 3.225851535797119, + "learning_rate": 2.693820224719101e-05, + "loss": 0.7554, + "step": 137 + }, + { + "epoch": 0.3101123595505618, + "grad_norm": 0.6228423118591309, + "learning_rate": 2.7134831460674154e-05, + "loss": 0.0753, + "step": 138 + }, + { + "epoch": 0.31235955056179776, + "grad_norm": 3.12802791595459, + "learning_rate": 2.7331460674157302e-05, + "loss": 0.4987, + "step": 139 + }, + { + "epoch": 0.3146067415730337, + "grad_norm": 4.1997880935668945, + "learning_rate": 2.7528089887640447e-05, + "loss": 0.8543, + "step": 140 + }, + { + "epoch": 0.31685393258426964, + "grad_norm": 4.3362860679626465, + "learning_rate": 2.7724719101123595e-05, + "loss": 0.9425, + "step": 141 + }, + { + "epoch": 0.31910112359550563, + "grad_norm": 0.5599316954612732, + "learning_rate": 2.7921348314606737e-05, + "loss": 0.0472, + "step": 142 + }, + { + "epoch": 0.32134831460674157, + "grad_norm": 3.503603458404541, + "learning_rate": 2.8117977528089885e-05, + "loss": 0.848, + "step": 143 + }, + { + "epoch": 0.3235955056179775, + "grad_norm": 4.712310314178467, + "learning_rate": 2.831460674157303e-05, + "loss": 0.8946, + "step": 144 + }, + { + "epoch": 0.3258426966292135, + "grad_norm": 3.1823527812957764, + "learning_rate": 2.851123595505618e-05, + "loss": 0.7841, + "step": 145 + }, + { + "epoch": 0.32808988764044944, + "grad_norm": 4.423196315765381, + "learning_rate": 2.8707865168539324e-05, + "loss": 0.6653, + "step": 146 + }, + { + "epoch": 0.3303370786516854, + "grad_norm": 4.137822151184082, + "learning_rate": 2.890449438202247e-05, + "loss": 0.3522, + "step": 147 + }, + { + "epoch": 0.3325842696629214, + "grad_norm": 2.997777223587036, + "learning_rate": 2.9101123595505617e-05, + "loss": 0.4853, + "step": 148 + }, + { + "epoch": 0.3348314606741573, + "grad_norm": 2.89650559425354, + "learning_rate": 2.9297752808988762e-05, + "loss": 0.4726, + "step": 149 + }, + { + "epoch": 0.33707865168539325, + "grad_norm": 5.486624717712402, + "learning_rate": 2.9494382022471907e-05, + "loss": 0.8693, + "step": 150 + }, + { + "epoch": 0.3393258426966292, + "grad_norm": 4.800889015197754, + "learning_rate": 2.9691011235955052e-05, + "loss": 0.8124, + "step": 151 + }, + { + "epoch": 0.3415730337078652, + "grad_norm": 4.188066005706787, + "learning_rate": 2.98876404494382e-05, + "loss": 0.8206, + "step": 152 + }, + { + "epoch": 0.3438202247191011, + "grad_norm": 4.340461254119873, + "learning_rate": 3.0084269662921345e-05, + "loss": 0.9406, + "step": 153 + }, + { + "epoch": 0.34606741573033706, + "grad_norm": 4.658304214477539, + "learning_rate": 3.028089887640449e-05, + "loss": 0.7944, + "step": 154 + }, + { + "epoch": 0.34831460674157305, + "grad_norm": 0.6266987919807434, + "learning_rate": 3.047752808988764e-05, + "loss": 0.0766, + "step": 155 + }, + { + "epoch": 0.350561797752809, + "grad_norm": 4.252346515655518, + "learning_rate": 3.067415730337079e-05, + "loss": 0.8609, + "step": 156 + }, + { + "epoch": 0.35280898876404493, + "grad_norm": 4.9649658203125, + "learning_rate": 3.087078651685393e-05, + "loss": 1.0533, + "step": 157 + }, + { + "epoch": 0.3550561797752809, + "grad_norm": 4.485607624053955, + "learning_rate": 3.106741573033708e-05, + "loss": 0.8396, + "step": 158 + }, + { + "epoch": 0.35730337078651686, + "grad_norm": 3.241231918334961, + "learning_rate": 3.126404494382022e-05, + "loss": 0.7865, + "step": 159 + }, + { + "epoch": 0.3595505617977528, + "grad_norm": 6.846582889556885, + "learning_rate": 3.146067415730337e-05, + "loss": 2.4616, + "step": 160 + }, + { + "epoch": 0.36179775280898874, + "grad_norm": 0.5514687895774841, + "learning_rate": 3.165730337078651e-05, + "loss": 0.0556, + "step": 161 + }, + { + "epoch": 0.36404494382022473, + "grad_norm": 3.7877562046051025, + "learning_rate": 3.185393258426966e-05, + "loss": 0.3758, + "step": 162 + }, + { + "epoch": 0.36629213483146067, + "grad_norm": 5.397939682006836, + "learning_rate": 3.205056179775281e-05, + "loss": 0.9312, + "step": 163 + }, + { + "epoch": 0.3685393258426966, + "grad_norm": 4.301459312438965, + "learning_rate": 3.2247191011235954e-05, + "loss": 0.7993, + "step": 164 + }, + { + "epoch": 0.3707865168539326, + "grad_norm": 4.49428129196167, + "learning_rate": 3.24438202247191e-05, + "loss": 0.8104, + "step": 165 + }, + { + "epoch": 0.37303370786516854, + "grad_norm": 3.2210912704467773, + "learning_rate": 3.2640449438202244e-05, + "loss": 0.8199, + "step": 166 + }, + { + "epoch": 0.3752808988764045, + "grad_norm": 5.359859466552734, + "learning_rate": 3.283707865168539e-05, + "loss": 1.0724, + "step": 167 + }, + { + "epoch": 0.3775280898876405, + "grad_norm": 4.00059700012207, + "learning_rate": 3.3033707865168534e-05, + "loss": 0.3521, + "step": 168 + }, + { + "epoch": 0.3797752808988764, + "grad_norm": 4.418768882751465, + "learning_rate": 3.3230337078651685e-05, + "loss": 0.8536, + "step": 169 + }, + { + "epoch": 0.38202247191011235, + "grad_norm": 4.15454626083374, + "learning_rate": 3.342696629213483e-05, + "loss": 0.872, + "step": 170 + }, + { + "epoch": 0.3842696629213483, + "grad_norm": 3.8060054779052734, + "learning_rate": 3.3623595505617975e-05, + "loss": 0.8009, + "step": 171 + }, + { + "epoch": 0.3865168539325843, + "grad_norm": 3.584745407104492, + "learning_rate": 3.382022471910112e-05, + "loss": 0.7798, + "step": 172 + }, + { + "epoch": 0.3887640449438202, + "grad_norm": 4.861410140991211, + "learning_rate": 3.4016853932584265e-05, + "loss": 0.5953, + "step": 173 + }, + { + "epoch": 0.39101123595505616, + "grad_norm": 3.983793020248413, + "learning_rate": 3.421348314606741e-05, + "loss": 0.7562, + "step": 174 + }, + { + "epoch": 0.39325842696629215, + "grad_norm": 4.841738224029541, + "learning_rate": 3.4410112359550555e-05, + "loss": 0.7227, + "step": 175 + }, + { + "epoch": 0.3955056179775281, + "grad_norm": 4.787370204925537, + "learning_rate": 3.460674157303371e-05, + "loss": 0.8953, + "step": 176 + }, + { + "epoch": 0.39775280898876403, + "grad_norm": 4.337812900543213, + "learning_rate": 3.480337078651685e-05, + "loss": 0.7102, + "step": 177 + }, + { + "epoch": 0.4, + "grad_norm": 0.9599294662475586, + "learning_rate": 3.5e-05, + "loss": 0.0667, + "step": 178 + }, + { + "epoch": 0.40224719101123596, + "grad_norm": 0.6864398717880249, + "learning_rate": 3.4999863718440846e-05, + "loss": 0.0528, + "step": 179 + }, + { + "epoch": 0.4044943820224719, + "grad_norm": 4.738316059112549, + "learning_rate": 3.499945487641664e-05, + "loss": 0.7312, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8256886005401611, + "eval_VitaminC_cosine_ap": 0.5557251062538118, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.4391498863697052, + "eval_VitaminC_cosine_precision": 0.5051334702258727, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.556640625, + "eval_VitaminC_dot_accuracy_threshold": 314.2790832519531, + "eval_VitaminC_dot_ap": 0.5397120960874565, + "eval_VitaminC_dot_f1": 0.6684636118598383, + "eval_VitaminC_dot_f1_threshold": 144.02464294433594, + "eval_VitaminC_dot_precision": 0.505091649694501, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.560546875, + "eval_VitaminC_euclidean_accuracy_threshold": 13.859346389770508, + "eval_VitaminC_euclidean_ap": 0.5582755831276058, + "eval_VitaminC_euclidean_f1": 0.667605633802817, + "eval_VitaminC_euclidean_f1_threshold": 18.874879837036133, + "eval_VitaminC_euclidean_precision": 0.5163398692810458, + "eval_VitaminC_euclidean_recall": 0.9442231075697212, + "eval_VitaminC_manhattan_accuracy": 0.560546875, + "eval_VitaminC_manhattan_accuracy_threshold": 239.6153564453125, + "eval_VitaminC_manhattan_ap": 0.5569115785564898, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 501.158447265625, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 314.2790832519531, + "eval_VitaminC_max_ap": 0.5582755831276058, + "eval_VitaminC_max_f1": 0.6684636118598383, + "eval_VitaminC_max_f1_threshold": 501.158447265625, + "eval_VitaminC_max_precision": 0.5163398692810458, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5582755831276058, + "eval_sts-test_pearson_cosine": 0.8825432226222443, + "eval_sts-test_pearson_dot": 0.8720125241659442, + "eval_sts-test_pearson_euclidean": 0.9053801707227738, + "eval_sts-test_pearson_manhattan": 0.9060044572091359, + "eval_sts-test_pearson_max": 0.9060044572091359, + "eval_sts-test_spearman_cosine": 0.9055030196626042, + "eval_sts-test_spearman_dot": 0.8729395405548455, + "eval_sts-test_spearman_euclidean": 0.9013990604854444, + "eval_sts-test_spearman_manhattan": 0.9021052353902007, + "eval_sts-test_spearman_max": 0.9055030196626042, + "eval_vitaminc-pairs_loss": 1.5215541124343872, + "eval_vitaminc-pairs_runtime": 1.8745, + "eval_vitaminc-pairs_samples_per_second": 57.614, + "eval_vitaminc-pairs_steps_per_second": 1.067, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_negation-triplets_loss": 0.9813100099563599, + "eval_negation-triplets_runtime": 0.3009, + "eval_negation-triplets_samples_per_second": 212.73, + "eval_negation-triplets_steps_per_second": 3.324, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_scitail-pairs-pos_loss": 0.09161412715911865, + "eval_scitail-pairs-pos_runtime": 0.3936, + "eval_scitail-pairs-pos_samples_per_second": 137.188, + "eval_scitail-pairs-pos_steps_per_second": 2.541, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_scitail-pairs-qa_loss": 0.0013133077882230282, + "eval_scitail-pairs-qa_runtime": 0.5286, + "eval_scitail-pairs-qa_samples_per_second": 242.147, + "eval_scitail-pairs-qa_steps_per_second": 3.784, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_xsum-pairs_loss": 0.049595557153224945, + "eval_xsum-pairs_runtime": 2.7447, + "eval_xsum-pairs_samples_per_second": 46.636, + "eval_xsum-pairs_steps_per_second": 0.729, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_sciq_pairs_loss": 0.017273178324103355, + "eval_sciq_pairs_runtime": 2.8401, + "eval_sciq_pairs_samples_per_second": 45.069, + "eval_sciq_pairs_steps_per_second": 0.704, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_qasc_pairs_loss": 0.09485691040754318, + "eval_qasc_pairs_runtime": 0.6594, + "eval_qasc_pairs_samples_per_second": 194.113, + "eval_qasc_pairs_steps_per_second": 3.033, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_openbookqa_pairs_loss": 0.7253161072731018, + "eval_openbookqa_pairs_runtime": 0.5801, + "eval_openbookqa_pairs_samples_per_second": 220.633, + "eval_openbookqa_pairs_steps_per_second": 3.447, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_msmarco_pairs_loss": 0.17383378744125366, + "eval_msmarco_pairs_runtime": 1.4824, + "eval_msmarco_pairs_samples_per_second": 86.346, + "eval_msmarco_pairs_steps_per_second": 1.349, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_nq_pairs_loss": 0.10324681550264359, + "eval_nq_pairs_runtime": 2.3542, + "eval_nq_pairs_samples_per_second": 54.372, + "eval_nq_pairs_steps_per_second": 0.85, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_trivia_pairs_loss": 0.5358972549438477, + "eval_trivia_pairs_runtime": 3.5881, + "eval_trivia_pairs_samples_per_second": 35.673, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_gooaq_pairs_loss": 0.3070329427719116, + "eval_gooaq_pairs_runtime": 0.9009, + "eval_gooaq_pairs_samples_per_second": 142.079, + "eval_gooaq_pairs_steps_per_second": 2.22, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_paws-pos_loss": 0.024055125191807747, + "eval_paws-pos_runtime": 0.6792, + "eval_paws-pos_samples_per_second": 188.469, + "eval_paws-pos_steps_per_second": 2.945, + "step": 180 + }, + { + "epoch": 0.4067415730337079, + "grad_norm": 5.063413143157959, + "learning_rate": 3.4998773481887046e-05, + "loss": 0.7809, + "step": 181 + }, + { + "epoch": 0.40898876404494383, + "grad_norm": 4.108719825744629, + "learning_rate": 3.499781954811798e-05, + "loss": 0.8333, + "step": 182 + }, + { + "epoch": 0.41123595505617977, + "grad_norm": 4.6362104415893555, + "learning_rate": 3.499659309368139e-05, + "loss": 0.9283, + "step": 183 + }, + { + "epoch": 0.4134831460674157, + "grad_norm": 4.432968616485596, + "learning_rate": 3.499509414245486e-05, + "loss": 0.7011, + "step": 184 + }, + { + "epoch": 0.4157303370786517, + "grad_norm": 4.040768623352051, + "learning_rate": 3.4993322723621164e-05, + "loss": 0.8413, + "step": 185 + }, + { + "epoch": 0.41797752808988764, + "grad_norm": 5.797406196594238, + "learning_rate": 3.499127887166769e-05, + "loss": 1.1679, + "step": 186 + }, + { + "epoch": 0.4202247191011236, + "grad_norm": 4.275143623352051, + "learning_rate": 3.498896262638578e-05, + "loss": 0.8701, + "step": 187 + }, + { + "epoch": 0.42247191011235957, + "grad_norm": 3.920672655105591, + "learning_rate": 3.498637403286993e-05, + "loss": 0.8139, + "step": 188 + }, + { + "epoch": 0.4247191011235955, + "grad_norm": 4.049210071563721, + "learning_rate": 3.498351314151693e-05, + "loss": 0.664, + "step": 189 + }, + { + "epoch": 0.42696629213483145, + "grad_norm": 4.007586479187012, + "learning_rate": 3.498038000802489e-05, + "loss": 0.3835, + "step": 190 + }, + { + "epoch": 0.42921348314606744, + "grad_norm": 3.7303507328033447, + "learning_rate": 3.497697469339215e-05, + "loss": 0.8516, + "step": 191 + }, + { + "epoch": 0.4314606741573034, + "grad_norm": 2.96820330619812, + "learning_rate": 3.497329726391606e-05, + "loss": 0.5479, + "step": 192 + }, + { + "epoch": 0.4337078651685393, + "grad_norm": 5.242271423339844, + "learning_rate": 3.496934779119175e-05, + "loss": 0.8642, + "step": 193 + }, + { + "epoch": 0.43595505617977526, + "grad_norm": 2.740006685256958, + "learning_rate": 3.496512635211069e-05, + "loss": 0.3121, + "step": 194 + }, + { + "epoch": 0.43820224719101125, + "grad_norm": 4.162242889404297, + "learning_rate": 3.496063302885921e-05, + "loss": 0.6932, + "step": 195 + }, + { + "epoch": 0.4404494382022472, + "grad_norm": 0.632938027381897, + "learning_rate": 3.495586790891689e-05, + "loss": 0.0647, + "step": 196 + }, + { + "epoch": 0.44269662921348313, + "grad_norm": 4.595058917999268, + "learning_rate": 3.495083108505487e-05, + "loss": 0.8173, + "step": 197 + }, + { + "epoch": 0.4449438202247191, + "grad_norm": 3.102372646331787, + "learning_rate": 3.494552265533404e-05, + "loss": 0.3122, + "step": 198 + }, + { + "epoch": 0.44719101123595506, + "grad_norm": 4.9895830154418945, + "learning_rate": 3.493994272310313e-05, + "loss": 0.7852, + "step": 199 + }, + { + "epoch": 0.449438202247191, + "grad_norm": 4.032258987426758, + "learning_rate": 3.493409139699669e-05, + "loss": 0.811, + "step": 200 + }, + { + "epoch": 0.451685393258427, + "grad_norm": 4.17324161529541, + "learning_rate": 3.4927968790932973e-05, + "loss": 0.7564, + "step": 201 + }, + { + "epoch": 0.45393258426966293, + "grad_norm": 0.49707159399986267, + "learning_rate": 3.492157502411174e-05, + "loss": 0.0541, + "step": 202 + }, + { + "epoch": 0.45617977528089887, + "grad_norm": 3.847059965133667, + "learning_rate": 3.491491022101194e-05, + "loss": 0.9085, + "step": 203 + }, + { + "epoch": 0.4584269662921348, + "grad_norm": 4.565647602081299, + "learning_rate": 3.4907974511389224e-05, + "loss": 0.8416, + "step": 204 + }, + { + "epoch": 0.4606741573033708, + "grad_norm": 0.8872150778770447, + "learning_rate": 3.4900768030273515e-05, + "loss": 0.0569, + "step": 205 + }, + { + "epoch": 0.46292134831460674, + "grad_norm": 3.2797999382019043, + "learning_rate": 3.4893290917966305e-05, + "loss": 0.7998, + "step": 206 + }, + { + "epoch": 0.4651685393258427, + "grad_norm": 5.683195114135742, + "learning_rate": 3.4885543320037956e-05, + "loss": 0.7218, + "step": 207 + }, + { + "epoch": 0.46741573033707867, + "grad_norm": 5.348382949829102, + "learning_rate": 3.4877525387324844e-05, + "loss": 0.9292, + "step": 208 + }, + { + "epoch": 0.4696629213483146, + "grad_norm": 4.3047099113464355, + "learning_rate": 3.486923727592647e-05, + "loss": 0.8279, + "step": 209 + }, + { + "epoch": 0.47191011235955055, + "grad_norm": 4.425166130065918, + "learning_rate": 3.486067914720236e-05, + "loss": 0.8452, + "step": 210 + }, + { + "epoch": 0.47415730337078654, + "grad_norm": 5.7947916984558105, + "learning_rate": 3.485185116776896e-05, + "loss": 1.1099, + "step": 211 + }, + { + "epoch": 0.4764044943820225, + "grad_norm": 4.257087230682373, + "learning_rate": 3.4842753509496385e-05, + "loss": 0.9436, + "step": 212 + }, + { + "epoch": 0.4786516853932584, + "grad_norm": 4.357375144958496, + "learning_rate": 3.483338634950507e-05, + "loss": 0.8389, + "step": 213 + }, + { + "epoch": 0.48089887640449436, + "grad_norm": 3.666268825531006, + "learning_rate": 3.482374987016233e-05, + "loss": 0.3297, + "step": 214 + }, + { + "epoch": 0.48314606741573035, + "grad_norm": 3.0593607425689697, + "learning_rate": 3.481384425907879e-05, + "loss": 0.8098, + "step": 215 + }, + { + "epoch": 0.4853932584269663, + "grad_norm": 0.4539957344532013, + "learning_rate": 3.480366970910476e-05, + "loss": 0.0386, + "step": 216 + }, + { + "epoch": 0.48764044943820223, + "grad_norm": 3.3102784156799316, + "learning_rate": 3.479322641832646e-05, + "loss": 0.7752, + "step": 217 + }, + { + "epoch": 0.4898876404494382, + "grad_norm": 3.8798298835754395, + "learning_rate": 3.4782514590062165e-05, + "loss": 0.8071, + "step": 218 + }, + { + "epoch": 0.49213483146067416, + "grad_norm": 6.300197124481201, + "learning_rate": 3.4771534432858255e-05, + "loss": 2.571, + "step": 219 + }, + { + "epoch": 0.4943820224719101, + "grad_norm": 4.163381099700928, + "learning_rate": 3.4760286160485145e-05, + "loss": 0.5912, + "step": 220 + }, + { + "epoch": 0.4966292134831461, + "grad_norm": 3.5834686756134033, + "learning_rate": 3.474876999193314e-05, + "loss": 0.3792, + "step": 221 + }, + { + "epoch": 0.49887640449438203, + "grad_norm": 4.494593143463135, + "learning_rate": 3.473698615140816e-05, + "loss": 0.7456, + "step": 222 + }, + { + "epoch": 0.501123595505618, + "grad_norm": 3.909142017364502, + "learning_rate": 3.4724934868327366e-05, + "loss": 0.7207, + "step": 223 + }, + { + "epoch": 0.503370786516854, + "grad_norm": 3.0387282371520996, + "learning_rate": 3.47126163773147e-05, + "loss": 0.3254, + "step": 224 + }, + { + "epoch": 0.5056179775280899, + "grad_norm": 0.6529088616371155, + "learning_rate": 3.4700030918196344e-05, + "loss": 0.0461, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8303268551826477, + "eval_VitaminC_cosine_ap": 0.5509523400010791, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2634955048561096, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 281.011474609375, + "eval_VitaminC_dot_ap": 0.5281394234221073, + "eval_VitaminC_dot_f1": 0.6711772665764546, + "eval_VitaminC_dot_f1_threshold": 141.11529541015625, + "eval_VitaminC_dot_precision": 0.5081967213114754, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 11.405111312866211, + "eval_VitaminC_euclidean_ap": 0.5573376843815556, + "eval_VitaminC_euclidean_f1": 0.6640211640211641, + "eval_VitaminC_euclidean_f1_threshold": 24.63976287841797, + "eval_VitaminC_euclidean_precision": 0.497029702970297, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 349.33441162109375, + "eval_VitaminC_manhattan_ap": 0.5561637270496671, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 505.0340270996094, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 349.33441162109375, + "eval_VitaminC_max_ap": 0.5573376843815556, + "eval_VitaminC_max_f1": 0.6711772665764546, + "eval_VitaminC_max_f1_threshold": 505.0340270996094, + "eval_VitaminC_max_precision": 0.5081967213114754, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5573376843815556, + "eval_sts-test_pearson_cosine": 0.8848200869109313, + "eval_sts-test_pearson_dot": 0.8723563516714744, + "eval_sts-test_pearson_euclidean": 0.9070688973489409, + "eval_sts-test_pearson_manhattan": 0.9073961699007848, + "eval_sts-test_pearson_max": 0.9073961699007848, + "eval_sts-test_spearman_cosine": 0.9050875937031079, + "eval_sts-test_spearman_dot": 0.8699468894518183, + "eval_sts-test_spearman_euclidean": 0.9020747597811932, + "eval_sts-test_spearman_manhattan": 0.9019608230696907, + "eval_sts-test_spearman_max": 0.9050875937031079, + "eval_vitaminc-pairs_loss": 1.4897230863571167, + "eval_vitaminc-pairs_runtime": 1.8927, + "eval_vitaminc-pairs_samples_per_second": 57.062, + "eval_vitaminc-pairs_steps_per_second": 1.057, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_negation-triplets_loss": 0.9457363486289978, + "eval_negation-triplets_runtime": 0.3019, + "eval_negation-triplets_samples_per_second": 212.002, + "eval_negation-triplets_steps_per_second": 3.313, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_scitail-pairs-pos_loss": 0.07606112211942673, + "eval_scitail-pairs-pos_runtime": 0.3972, + "eval_scitail-pairs-pos_samples_per_second": 135.938, + "eval_scitail-pairs-pos_steps_per_second": 2.517, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_scitail-pairs-qa_loss": 0.001212431932799518, + "eval_scitail-pairs-qa_runtime": 0.5348, + "eval_scitail-pairs-qa_samples_per_second": 239.347, + "eval_scitail-pairs-qa_steps_per_second": 3.74, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_xsum-pairs_loss": 0.02758924476802349, + "eval_xsum-pairs_runtime": 2.767, + "eval_xsum-pairs_samples_per_second": 46.26, + "eval_xsum-pairs_steps_per_second": 0.723, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_sciq_pairs_loss": 0.016450434923171997, + "eval_sciq_pairs_runtime": 2.8812, + "eval_sciq_pairs_samples_per_second": 44.426, + "eval_sciq_pairs_steps_per_second": 0.694, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_qasc_pairs_loss": 0.09214109182357788, + "eval_qasc_pairs_runtime": 0.6597, + "eval_qasc_pairs_samples_per_second": 194.029, + "eval_qasc_pairs_steps_per_second": 3.032, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_openbookqa_pairs_loss": 0.7429620623588562, + "eval_openbookqa_pairs_runtime": 0.5947, + "eval_openbookqa_pairs_samples_per_second": 215.22, + "eval_openbookqa_pairs_steps_per_second": 3.363, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_msmarco_pairs_loss": 0.17871831357479095, + "eval_msmarco_pairs_runtime": 1.5003, + "eval_msmarco_pairs_samples_per_second": 85.314, + "eval_msmarco_pairs_steps_per_second": 1.333, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_nq_pairs_loss": 0.09803248196840286, + "eval_nq_pairs_runtime": 2.3587, + "eval_nq_pairs_samples_per_second": 54.267, + "eval_nq_pairs_steps_per_second": 0.848, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_trivia_pairs_loss": 0.5323590636253357, + "eval_trivia_pairs_runtime": 3.6206, + "eval_trivia_pairs_samples_per_second": 35.354, + "eval_trivia_pairs_steps_per_second": 0.552, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_gooaq_pairs_loss": 0.2667708098888397, + "eval_gooaq_pairs_runtime": 0.9171, + "eval_gooaq_pairs_samples_per_second": 139.573, + "eval_gooaq_pairs_steps_per_second": 2.181, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_paws-pos_loss": 0.0236118845641613, + "eval_paws-pos_runtime": 0.6973, + "eval_paws-pos_samples_per_second": 183.563, + "eval_paws-pos_steps_per_second": 2.868, + "step": 225 + }, + { + "epoch": 0.5078651685393258, + "grad_norm": 3.5867371559143066, + "learning_rate": 3.4687178735995997e-05, + "loss": 0.347, + "step": 226 + }, + { + "epoch": 0.5101123595505618, + "grad_norm": 0.37994861602783203, + "learning_rate": 3.467406008093016e-05, + "loss": 0.0417, + "step": 227 + }, + { + "epoch": 0.5123595505617977, + "grad_norm": 4.081336975097656, + "learning_rate": 3.466067520840322e-05, + "loss": 0.7783, + "step": 228 + }, + { + "epoch": 0.5146067415730337, + "grad_norm": 4.306976795196533, + "learning_rate": 3.46470243790025e-05, + "loss": 0.9027, + "step": 229 + }, + { + "epoch": 0.5168539325842697, + "grad_norm": 4.0280022621154785, + "learning_rate": 3.4633107858493206e-05, + "loss": 0.7166, + "step": 230 + }, + { + "epoch": 0.5191011235955056, + "grad_norm": 3.4807679653167725, + "learning_rate": 3.461892591781319e-05, + "loss": 0.705, + "step": 231 + }, + { + "epoch": 0.5213483146067416, + "grad_norm": 4.166563510894775, + "learning_rate": 3.4604478833067756e-05, + "loss": 0.8425, + "step": 232 + }, + { + "epoch": 0.5235955056179775, + "grad_norm": 3.828537940979004, + "learning_rate": 3.4589766885524204e-05, + "loss": 0.5362, + "step": 233 + }, + { + "epoch": 0.5258426966292135, + "grad_norm": 4.316190242767334, + "learning_rate": 3.4574790361606435e-05, + "loss": 0.7869, + "step": 234 + }, + { + "epoch": 0.5280898876404494, + "grad_norm": 4.244805335998535, + "learning_rate": 3.4559549552889285e-05, + "loss": 0.88, + "step": 235 + }, + { + "epoch": 0.5303370786516854, + "grad_norm": 4.208700656890869, + "learning_rate": 3.454404475609294e-05, + "loss": 0.8077, + "step": 236 + }, + { + "epoch": 0.5325842696629214, + "grad_norm": 3.1473183631896973, + "learning_rate": 3.4528276273077094e-05, + "loss": 0.8145, + "step": 237 + }, + { + "epoch": 0.5348314606741573, + "grad_norm": 3.798297166824341, + "learning_rate": 3.4512244410835094e-05, + "loss": 0.78, + "step": 238 + }, + { + "epoch": 0.5370786516853933, + "grad_norm": 0.535529375076294, + "learning_rate": 3.449594948148796e-05, + "loss": 0.0536, + "step": 239 + }, + { + "epoch": 0.5393258426966292, + "grad_norm": 3.2119970321655273, + "learning_rate": 3.447939180227833e-05, + "loss": 0.7975, + "step": 240 + }, + { + "epoch": 0.5415730337078651, + "grad_norm": 4.725860118865967, + "learning_rate": 3.446257169556425e-05, + "loss": 0.8932, + "step": 241 + }, + { + "epoch": 0.5438202247191011, + "grad_norm": 3.867676258087158, + "learning_rate": 3.4445489488812906e-05, + "loss": 0.3386, + "step": 242 + }, + { + "epoch": 0.5460674157303371, + "grad_norm": 3.981114387512207, + "learning_rate": 3.4428145514594274e-05, + "loss": 0.7741, + "step": 243 + }, + { + "epoch": 0.5483146067415731, + "grad_norm": 4.034990310668945, + "learning_rate": 3.4410540110574616e-05, + "loss": 0.7439, + "step": 244 + }, + { + "epoch": 0.550561797752809, + "grad_norm": 4.209812641143799, + "learning_rate": 3.4392673619509916e-05, + "loss": 0.7999, + "step": 245 + }, + { + "epoch": 0.5528089887640449, + "grad_norm": 3.942631244659424, + "learning_rate": 3.437454638923921e-05, + "loss": 0.8542, + "step": 246 + }, + { + "epoch": 0.5550561797752809, + "grad_norm": 4.087955951690674, + "learning_rate": 3.435615877267783e-05, + "loss": 0.6992, + "step": 247 + }, + { + "epoch": 0.5573033707865168, + "grad_norm": 3.885822057723999, + "learning_rate": 3.4337511127810466e-05, + "loss": 0.8579, + "step": 248 + }, + { + "epoch": 0.5595505617977528, + "grad_norm": 5.198770523071289, + "learning_rate": 3.431860381768431e-05, + "loss": 1.0221, + "step": 249 + }, + { + "epoch": 0.5617977528089888, + "grad_norm": 4.321418285369873, + "learning_rate": 3.4299437210401866e-05, + "loss": 0.699, + "step": 250 + }, + { + "epoch": 0.5640449438202247, + "grad_norm": 3.1992154121398926, + "learning_rate": 3.4280011679113884e-05, + "loss": 0.8523, + "step": 251 + }, + { + "epoch": 0.5662921348314607, + "grad_norm": 4.94226598739624, + "learning_rate": 3.4260327602012027e-05, + "loss": 1.0307, + "step": 252 + }, + { + "epoch": 0.5685393258426966, + "grad_norm": 3.958935499191284, + "learning_rate": 3.424038536232154e-05, + "loss": 0.846, + "step": 253 + }, + { + "epoch": 0.5707865168539326, + "grad_norm": 4.023487091064453, + "learning_rate": 3.4220185348293775e-05, + "loss": 0.8361, + "step": 254 + }, + { + "epoch": 0.5730337078651685, + "grad_norm": 3.275102138519287, + "learning_rate": 3.4199727953198665e-05, + "loss": 0.8224, + "step": 255 + }, + { + "epoch": 0.5752808988764045, + "grad_norm": 3.6130261421203613, + "learning_rate": 3.417901357531701e-05, + "loss": 0.5301, + "step": 256 + }, + { + "epoch": 0.5775280898876405, + "grad_norm": 4.571770668029785, + "learning_rate": 3.415804261793277e-05, + "loss": 0.3795, + "step": 257 + }, + { + "epoch": 0.5797752808988764, + "grad_norm": 3.1884663105010986, + "learning_rate": 3.413681548932521e-05, + "loss": 0.5434, + "step": 258 + }, + { + "epoch": 0.5820224719101124, + "grad_norm": 4.795211315155029, + "learning_rate": 3.411533260276091e-05, + "loss": 0.847, + "step": 259 + }, + { + "epoch": 0.5842696629213483, + "grad_norm": 4.761318206787109, + "learning_rate": 3.409359437648579e-05, + "loss": 0.7323, + "step": 260 + }, + { + "epoch": 0.5865168539325842, + "grad_norm": 4.4683098793029785, + "learning_rate": 3.407160123371687e-05, + "loss": 0.6606, + "step": 261 + }, + { + "epoch": 0.5887640449438202, + "grad_norm": 0.7677178382873535, + "learning_rate": 3.404935360263415e-05, + "loss": 0.0543, + "step": 262 + }, + { + "epoch": 0.5910112359550562, + "grad_norm": 4.110381126403809, + "learning_rate": 3.4026851916372166e-05, + "loss": 0.6709, + "step": 263 + }, + { + "epoch": 0.5932584269662922, + "grad_norm": 4.766375541687012, + "learning_rate": 3.400409661301162e-05, + "loss": 0.809, + "step": 264 + }, + { + "epoch": 0.5955056179775281, + "grad_norm": 5.389264106750488, + "learning_rate": 3.398108813557082e-05, + "loss": 1.0391, + "step": 265 + }, + { + "epoch": 0.597752808988764, + "grad_norm": 3.8780810832977295, + "learning_rate": 3.3957826931997094e-05, + "loss": 0.7396, + "step": 266 + }, + { + "epoch": 0.6, + "grad_norm": 4.399974822998047, + "learning_rate": 3.393431345515801e-05, + "loss": 0.7839, + "step": 267 + }, + { + "epoch": 0.6022471910112359, + "grad_norm": 3.2098612785339355, + "learning_rate": 3.391054816283262e-05, + "loss": 0.3054, + "step": 268 + }, + { + "epoch": 0.604494382022472, + "grad_norm": 3.606182098388672, + "learning_rate": 3.3886531517702505e-05, + "loss": 0.5258, + "step": 269 + }, + { + "epoch": 0.6067415730337079, + "grad_norm": 4.3564934730529785, + "learning_rate": 3.3862263987342784e-05, + "loss": 0.7367, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_VitaminC_cosine_accuracy": 0.552734375, + "eval_VitaminC_cosine_accuracy_threshold": 0.814909815788269, + "eval_VitaminC_cosine_ap": 0.5506214433093293, + "eval_VitaminC_cosine_f1": 0.664886515353805, + "eval_VitaminC_cosine_f1_threshold": 0.3506072461605072, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 0.9920318725099602, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 316.90899658203125, + "eval_VitaminC_dot_ap": 0.5353657977329522, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 155.67796325683594, + "eval_VitaminC_dot_precision": 0.506198347107438, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.55078125, + "eval_VitaminC_euclidean_accuracy_threshold": 10.77621841430664, + "eval_VitaminC_euclidean_ap": 0.550546292530568, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 24.22284698486328, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 335.6986389160156, + "eval_VitaminC_manhattan_ap": 0.5497325043939846, + "eval_VitaminC_manhattan_f1": 0.6640211640211641, + "eval_VitaminC_manhattan_f1_threshold": 513.494873046875, + "eval_VitaminC_manhattan_precision": 0.497029702970297, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5546875, + "eval_VitaminC_max_accuracy_threshold": 335.6986389160156, + "eval_VitaminC_max_ap": 0.5506214433093293, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 513.494873046875, + "eval_VitaminC_max_precision": 0.506198347107438, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5506214433093293, + "eval_sts-test_pearson_cosine": 0.8848372816940555, + "eval_sts-test_pearson_dot": 0.8774995772730847, + "eval_sts-test_pearson_euclidean": 0.9058906663416005, + "eval_sts-test_pearson_manhattan": 0.9066316554236529, + "eval_sts-test_pearson_max": 0.9066316554236529, + "eval_sts-test_spearman_cosine": 0.9085018016884417, + "eval_sts-test_spearman_dot": 0.8776881864036095, + "eval_sts-test_spearman_euclidean": 0.903223569412372, + "eval_sts-test_spearman_manhattan": 0.9037578547221237, + "eval_sts-test_spearman_max": 0.9085018016884417, + "eval_vitaminc-pairs_loss": 1.4935871362686157, + "eval_vitaminc-pairs_runtime": 1.8963, + "eval_vitaminc-pairs_samples_per_second": 56.952, + "eval_vitaminc-pairs_steps_per_second": 1.055, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_negation-triplets_loss": 0.9505463242530823, + "eval_negation-triplets_runtime": 0.3041, + "eval_negation-triplets_samples_per_second": 210.485, + "eval_negation-triplets_steps_per_second": 3.289, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_scitail-pairs-pos_loss": 0.09635873883962631, + "eval_scitail-pairs-pos_runtime": 0.4048, + "eval_scitail-pairs-pos_samples_per_second": 133.396, + "eval_scitail-pairs-pos_steps_per_second": 2.47, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_scitail-pairs-qa_loss": 0.0009468490607105196, + "eval_scitail-pairs-qa_runtime": 0.5341, + "eval_scitail-pairs-qa_samples_per_second": 239.65, + "eval_scitail-pairs-qa_steps_per_second": 3.745, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_xsum-pairs_loss": 0.026903513818979263, + "eval_xsum-pairs_runtime": 2.7518, + "eval_xsum-pairs_samples_per_second": 46.514, + "eval_xsum-pairs_steps_per_second": 0.727, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_sciq_pairs_loss": 0.01619444414973259, + "eval_sciq_pairs_runtime": 2.8856, + "eval_sciq_pairs_samples_per_second": 44.358, + "eval_sciq_pairs_steps_per_second": 0.693, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_qasc_pairs_loss": 0.09130185097455978, + "eval_qasc_pairs_runtime": 0.6645, + "eval_qasc_pairs_samples_per_second": 192.631, + "eval_qasc_pairs_steps_per_second": 3.01, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_openbookqa_pairs_loss": 0.7336423397064209, + "eval_openbookqa_pairs_runtime": 0.5935, + "eval_openbookqa_pairs_samples_per_second": 215.687, + "eval_openbookqa_pairs_steps_per_second": 3.37, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_msmarco_pairs_loss": 0.15868164598941803, + "eval_msmarco_pairs_runtime": 1.5086, + "eval_msmarco_pairs_samples_per_second": 84.844, + "eval_msmarco_pairs_steps_per_second": 1.326, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_nq_pairs_loss": 0.10780799388885498, + "eval_nq_pairs_runtime": 2.3746, + "eval_nq_pairs_samples_per_second": 53.905, + "eval_nq_pairs_steps_per_second": 0.842, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_trivia_pairs_loss": 0.49691149592399597, + "eval_trivia_pairs_runtime": 3.5992, + "eval_trivia_pairs_samples_per_second": 35.563, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_gooaq_pairs_loss": 0.3025541603565216, + "eval_gooaq_pairs_runtime": 0.9181, + "eval_gooaq_pairs_samples_per_second": 139.423, + "eval_gooaq_pairs_steps_per_second": 2.178, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_paws-pos_loss": 0.024440350010991096, + "eval_paws-pos_runtime": 0.7046, + "eval_paws-pos_samples_per_second": 181.67, + "eval_paws-pos_steps_per_second": 2.839, + "step": 270 + }, + { + "epoch": 0.6089887640449438, + "grad_norm": 3.21183705329895, + "learning_rate": 3.383774604421301e-05, + "loss": 0.747, + "step": 271 + }, + { + "epoch": 0.6112359550561798, + "grad_norm": 4.403411865234375, + "learning_rate": 3.3812978165647975e-05, + "loss": 0.7855, + "step": 272 + }, + { + "epoch": 0.6134831460674157, + "grad_norm": 0.46612274646759033, + "learning_rate": 3.3787960833848405e-05, + "loss": 0.0473, + "step": 273 + }, + { + "epoch": 0.6157303370786517, + "grad_norm": 3.30610990524292, + "learning_rate": 3.3762694535871584e-05, + "loss": 0.4378, + "step": 274 + }, + { + "epoch": 0.6179775280898876, + "grad_norm": 3.7408640384674072, + "learning_rate": 3.373717976362187e-05, + "loss": 0.8767, + "step": 275 + }, + { + "epoch": 0.6202247191011236, + "grad_norm": 5.345012187957764, + "learning_rate": 3.3711417013841105e-05, + "loss": 1.0345, + "step": 276 + }, + { + "epoch": 0.6224719101123596, + "grad_norm": 3.518765449523926, + "learning_rate": 3.368540678809897e-05, + "loss": 0.5182, + "step": 277 + }, + { + "epoch": 0.6247191011235955, + "grad_norm": 6.666887283325195, + "learning_rate": 3.3659149592783186e-05, + "loss": 2.5949, + "step": 278 + }, + { + "epoch": 0.6269662921348315, + "grad_norm": 3.197411298751831, + "learning_rate": 3.363264593908969e-05, + "loss": 0.833, + "step": 279 + }, + { + "epoch": 0.6292134831460674, + "grad_norm": 0.6012090444564819, + "learning_rate": 3.360589634301267e-05, + "loss": 0.0778, + "step": 280 + }, + { + "epoch": 0.6314606741573033, + "grad_norm": 4.5016188621521, + "learning_rate": 3.357890132533449e-05, + "loss": 0.8048, + "step": 281 + }, + { + "epoch": 0.6337078651685393, + "grad_norm": 3.865889072418213, + "learning_rate": 3.35516614116156e-05, + "loss": 0.7524, + "step": 282 + }, + { + "epoch": 0.6359550561797753, + "grad_norm": 3.2998361587524414, + "learning_rate": 3.3524177132184266e-05, + "loss": 0.3246, + "step": 283 + }, + { + "epoch": 0.6382022471910113, + "grad_norm": 0.6418587565422058, + "learning_rate": 3.349644902212628e-05, + "loss": 0.0728, + "step": 284 + }, + { + "epoch": 0.6404494382022472, + "grad_norm": 5.772351264953613, + "learning_rate": 3.34684776212745e-05, + "loss": 2.3619, + "step": 285 + }, + { + "epoch": 0.6426966292134831, + "grad_norm": 3.769488573074341, + "learning_rate": 3.3440263474198376e-05, + "loss": 0.7464, + "step": 286 + }, + { + "epoch": 0.6449438202247191, + "grad_norm": 4.559601783752441, + "learning_rate": 3.3411807130193325e-05, + "loss": 0.6691, + "step": 287 + }, + { + "epoch": 0.647191011235955, + "grad_norm": 0.45337462425231934, + "learning_rate": 3.338310914327005e-05, + "loss": 0.059, + "step": 288 + }, + { + "epoch": 0.6494382022471911, + "grad_norm": 4.7184553146362305, + "learning_rate": 3.3354170072143766e-05, + "loss": 0.7841, + "step": 289 + }, + { + "epoch": 0.651685393258427, + "grad_norm": 3.886216640472412, + "learning_rate": 3.332499048022328e-05, + "loss": 0.647, + "step": 290 + }, + { + "epoch": 0.6539325842696629, + "grad_norm": 4.497567176818848, + "learning_rate": 3.329557093560006e-05, + "loss": 0.8814, + "step": 291 + }, + { + "epoch": 0.6561797752808989, + "grad_norm": 3.995391368865967, + "learning_rate": 3.326591201103716e-05, + "loss": 0.7247, + "step": 292 + }, + { + "epoch": 0.6584269662921348, + "grad_norm": 0.4348815083503723, + "learning_rate": 3.323601428395809e-05, + "loss": 0.059, + "step": 293 + }, + { + "epoch": 0.6606741573033708, + "grad_norm": 3.6197896003723145, + "learning_rate": 3.320587833643554e-05, + "loss": 0.8317, + "step": 294 + }, + { + "epoch": 0.6629213483146067, + "grad_norm": 4.4088215827941895, + "learning_rate": 3.317550475518006e-05, + "loss": 0.8548, + "step": 295 + }, + { + "epoch": 0.6651685393258427, + "grad_norm": 4.541014194488525, + "learning_rate": 3.314489413152867e-05, + "loss": 0.9213, + "step": 296 + }, + { + "epoch": 0.6674157303370787, + "grad_norm": 3.067857265472412, + "learning_rate": 3.311404706143329e-05, + "loss": 0.6923, + "step": 297 + }, + { + "epoch": 0.6696629213483146, + "grad_norm": 4.037753582000732, + "learning_rate": 3.3082964145449174e-05, + "loss": 0.7777, + "step": 298 + }, + { + "epoch": 0.6719101123595506, + "grad_norm": 4.280182838439941, + "learning_rate": 3.305164598872322e-05, + "loss": 0.7496, + "step": 299 + }, + { + "epoch": 0.6741573033707865, + "grad_norm": 4.357325077056885, + "learning_rate": 3.302009320098218e-05, + "loss": 0.7636, + "step": 300 + }, + { + "epoch": 0.6764044943820224, + "grad_norm": 4.007940292358398, + "learning_rate": 3.2988306396520775e-05, + "loss": 0.6867, + "step": 301 + }, + { + "epoch": 0.6786516853932584, + "grad_norm": 0.8544747233390808, + "learning_rate": 3.295628619418977e-05, + "loss": 0.0506, + "step": 302 + }, + { + "epoch": 0.6808988764044944, + "grad_norm": 3.34498929977417, + "learning_rate": 3.292403321738387e-05, + "loss": 0.3346, + "step": 303 + }, + { + "epoch": 0.6831460674157304, + "grad_norm": 2.441420316696167, + "learning_rate": 3.289154809402967e-05, + "loss": 0.2485, + "step": 304 + }, + { + "epoch": 0.6853932584269663, + "grad_norm": 4.533839702606201, + "learning_rate": 3.285883145657334e-05, + "loss": 0.8508, + "step": 305 + }, + { + "epoch": 0.6876404494382022, + "grad_norm": 3.2033944129943848, + "learning_rate": 3.2825883941968346e-05, + "loss": 0.8464, + "step": 306 + }, + { + "epoch": 0.6898876404494382, + "grad_norm": 3.6305220127105713, + "learning_rate": 3.279270619166309e-05, + "loss": 0.3385, + "step": 307 + }, + { + "epoch": 0.6921348314606741, + "grad_norm": 4.438405990600586, + "learning_rate": 3.2759298851588336e-05, + "loss": 0.8837, + "step": 308 + }, + { + "epoch": 0.6943820224719102, + "grad_norm": 4.252586841583252, + "learning_rate": 3.272566257214474e-05, + "loss": 0.9019, + "step": 309 + }, + { + "epoch": 0.6966292134831461, + "grad_norm": 4.231752872467041, + "learning_rate": 3.2691798008190096e-05, + "loss": 0.6922, + "step": 310 + }, + { + "epoch": 0.698876404494382, + "grad_norm": 3.862682342529297, + "learning_rate": 3.265770581902662e-05, + "loss": 0.6348, + "step": 311 + }, + { + "epoch": 0.701123595505618, + "grad_norm": 3.783026933670044, + "learning_rate": 3.262338666838813e-05, + "loss": 0.7522, + "step": 312 + }, + { + "epoch": 0.7033707865168539, + "grad_norm": 4.141933917999268, + "learning_rate": 3.25888412244271e-05, + "loss": 0.7843, + "step": 313 + }, + { + "epoch": 0.7056179775280899, + "grad_norm": 0.7638006210327148, + "learning_rate": 3.2554070159701684e-05, + "loss": 0.0493, + "step": 314 + }, + { + "epoch": 0.7078651685393258, + "grad_norm": 3.7285079956054688, + "learning_rate": 3.2519074151162564e-05, + "loss": 0.357, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8318675756454468, + "eval_VitaminC_cosine_ap": 0.553255462027648, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.3080925941467285, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 284.4936218261719, + "eval_VitaminC_dot_ap": 0.5335304755231123, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 117.11366271972656, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 14.916669845581055, + "eval_VitaminC_euclidean_ap": 0.5560392780320775, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.758323669433594, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 303.769775390625, + "eval_VitaminC_manhattan_ap": 0.5575735035337728, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 500.6726989746094, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 303.769775390625, + "eval_VitaminC_max_ap": 0.5575735035337728, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 500.6726989746094, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5575735035337728, + "eval_sts-test_pearson_cosine": 0.884017793393225, + "eval_sts-test_pearson_dot": 0.8725802033594147, + "eval_sts-test_pearson_euclidean": 0.9065592531799239, + "eval_sts-test_pearson_manhattan": 0.9070236641674441, + "eval_sts-test_pearson_max": 0.9070236641674441, + "eval_sts-test_spearman_cosine": 0.9067846957888538, + "eval_sts-test_spearman_dot": 0.8716365180769119, + "eval_sts-test_spearman_euclidean": 0.9026938039800204, + "eval_sts-test_spearman_manhattan": 0.903306941012344, + "eval_sts-test_spearman_max": 0.9067846957888538, + "eval_vitaminc-pairs_loss": 1.4885247945785522, + "eval_vitaminc-pairs_runtime": 1.9137, + "eval_vitaminc-pairs_samples_per_second": 56.436, + "eval_vitaminc-pairs_steps_per_second": 1.045, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_negation-triplets_loss": 0.9597576856613159, + "eval_negation-triplets_runtime": 0.3023, + "eval_negation-triplets_samples_per_second": 211.742, + "eval_negation-triplets_steps_per_second": 3.308, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_scitail-pairs-pos_loss": 0.09951130300760269, + "eval_scitail-pairs-pos_runtime": 0.3896, + "eval_scitail-pairs-pos_samples_per_second": 138.608, + "eval_scitail-pairs-pos_steps_per_second": 2.567, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_scitail-pairs-qa_loss": 0.0010157548822462559, + "eval_scitail-pairs-qa_runtime": 0.5373, + "eval_scitail-pairs-qa_samples_per_second": 238.245, + "eval_scitail-pairs-qa_steps_per_second": 3.723, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_xsum-pairs_loss": 0.027823584154248238, + "eval_xsum-pairs_runtime": 2.7408, + "eval_xsum-pairs_samples_per_second": 46.701, + "eval_xsum-pairs_steps_per_second": 0.73, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_sciq_pairs_loss": 0.015241424553096294, + "eval_sciq_pairs_runtime": 2.8458, + "eval_sciq_pairs_samples_per_second": 44.978, + "eval_sciq_pairs_steps_per_second": 0.703, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_qasc_pairs_loss": 0.09173130989074707, + "eval_qasc_pairs_runtime": 0.6608, + "eval_qasc_pairs_samples_per_second": 193.694, + "eval_qasc_pairs_steps_per_second": 3.026, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_openbookqa_pairs_loss": 0.6921954154968262, + "eval_openbookqa_pairs_runtime": 0.5893, + "eval_openbookqa_pairs_samples_per_second": 217.196, + "eval_openbookqa_pairs_steps_per_second": 3.394, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_msmarco_pairs_loss": 0.15177518129348755, + "eval_msmarco_pairs_runtime": 1.494, + "eval_msmarco_pairs_samples_per_second": 85.673, + "eval_msmarco_pairs_steps_per_second": 1.339, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_nq_pairs_loss": 0.10136909037828445, + "eval_nq_pairs_runtime": 2.3524, + "eval_nq_pairs_samples_per_second": 54.413, + "eval_nq_pairs_steps_per_second": 0.85, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_trivia_pairs_loss": 0.5301617980003357, + "eval_trivia_pairs_runtime": 3.5809, + "eval_trivia_pairs_samples_per_second": 35.745, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_gooaq_pairs_loss": 0.28424739837646484, + "eval_gooaq_pairs_runtime": 0.9167, + "eval_gooaq_pairs_samples_per_second": 139.635, + "eval_gooaq_pairs_steps_per_second": 2.182, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_paws-pos_loss": 0.023981213569641113, + "eval_paws-pos_runtime": 0.6966, + "eval_paws-pos_samples_per_second": 183.744, + "eval_paws-pos_steps_per_second": 2.871, + "step": 315 + }, + { + "epoch": 0.7101123595505618, + "grad_norm": 3.6374969482421875, + "learning_rate": 3.248385388013984e-05, + "loss": 0.841, + "step": 316 + }, + { + "epoch": 0.7123595505617978, + "grad_norm": 4.251607418060303, + "learning_rate": 3.2448410032329716e-05, + "loss": 0.5849, + "step": 317 + }, + { + "epoch": 0.7146067415730337, + "grad_norm": 4.323038101196289, + "learning_rate": 3.241274329778117e-05, + "loss": 0.6818, + "step": 318 + }, + { + "epoch": 0.7168539325842697, + "grad_norm": 4.027289867401123, + "learning_rate": 3.237685437088251e-05, + "loss": 0.8269, + "step": 319 + }, + { + "epoch": 0.7191011235955056, + "grad_norm": 3.014479875564575, + "learning_rate": 3.234074395034787e-05, + "loss": 0.6979, + "step": 320 + }, + { + "epoch": 0.7213483146067415, + "grad_norm": 3.5980277061462402, + "learning_rate": 3.2304412739203595e-05, + "loss": 0.3218, + "step": 321 + }, + { + "epoch": 0.7235955056179775, + "grad_norm": 3.2924134731292725, + "learning_rate": 3.226786144477456e-05, + "loss": 0.8206, + "step": 322 + }, + { + "epoch": 0.7258426966292135, + "grad_norm": 2.524231195449829, + "learning_rate": 3.2231090778670385e-05, + "loss": 0.2106, + "step": 323 + }, + { + "epoch": 0.7280898876404495, + "grad_norm": 5.464061260223389, + "learning_rate": 3.2194101456771604e-05, + "loss": 1.0524, + "step": 324 + }, + { + "epoch": 0.7303370786516854, + "grad_norm": 3.4692578315734863, + "learning_rate": 3.215689419921572e-05, + "loss": 0.3774, + "step": 325 + }, + { + "epoch": 0.7325842696629213, + "grad_norm": 4.947183132171631, + "learning_rate": 3.211946973038315e-05, + "loss": 0.9098, + "step": 326 + }, + { + "epoch": 0.7348314606741573, + "grad_norm": 4.432866096496582, + "learning_rate": 3.208182877888319e-05, + "loss": 0.7988, + "step": 327 + }, + { + "epoch": 0.7370786516853932, + "grad_norm": 4.585951328277588, + "learning_rate": 3.204397207753978e-05, + "loss": 0.7916, + "step": 328 + }, + { + "epoch": 0.7393258426966293, + "grad_norm": 3.7288637161254883, + "learning_rate": 3.200590036337724e-05, + "loss": 0.6314, + "step": 329 + }, + { + "epoch": 0.7415730337078652, + "grad_norm": 3.840074300765991, + "learning_rate": 3.196761437760593e-05, + "loss": 0.8628, + "step": 330 + }, + { + "epoch": 0.7438202247191011, + "grad_norm": 0.6423048377037048, + "learning_rate": 3.192911486560784e-05, + "loss": 0.0688, + "step": 331 + }, + { + "epoch": 0.7460674157303371, + "grad_norm": 4.148509502410889, + "learning_rate": 3.1890402576922036e-05, + "loss": 0.7386, + "step": 332 + }, + { + "epoch": 0.748314606741573, + "grad_norm": 4.7345147132873535, + "learning_rate": 3.1851478265230103e-05, + "loss": 0.8458, + "step": 333 + }, + { + "epoch": 0.750561797752809, + "grad_norm": 0.695708155632019, + "learning_rate": 3.181234268834144e-05, + "loss": 0.0442, + "step": 334 + }, + { + "epoch": 0.7528089887640449, + "grad_norm": 3.434741735458374, + "learning_rate": 3.177299660817856e-05, + "loss": 0.317, + "step": 335 + }, + { + "epoch": 0.755056179775281, + "grad_norm": 3.306964874267578, + "learning_rate": 3.1733440790762176e-05, + "loss": 0.8087, + "step": 336 + }, + { + "epoch": 0.7573033707865169, + "grad_norm": 3.010828733444214, + "learning_rate": 3.169367600619637e-05, + "loss": 0.3398, + "step": 337 + }, + { + "epoch": 0.7595505617977528, + "grad_norm": 4.152151584625244, + "learning_rate": 3.1653703028653545e-05, + "loss": 0.699, + "step": 338 + }, + { + "epoch": 0.7617977528089888, + "grad_norm": 4.073326110839844, + "learning_rate": 3.161352263635937e-05, + "loss": 0.7901, + "step": 339 + }, + { + "epoch": 0.7640449438202247, + "grad_norm": 4.365633487701416, + "learning_rate": 3.157313561157764e-05, + "loss": 0.8072, + "step": 340 + }, + { + "epoch": 0.7662921348314606, + "grad_norm": 3.506556272506714, + "learning_rate": 3.153254274059501e-05, + "loss": 0.5939, + "step": 341 + }, + { + "epoch": 0.7685393258426966, + "grad_norm": 4.319092273712158, + "learning_rate": 3.149174481370575e-05, + "loss": 0.6933, + "step": 342 + }, + { + "epoch": 0.7707865168539326, + "grad_norm": 0.6184964179992676, + "learning_rate": 3.145074262519629e-05, + "loss": 0.0437, + "step": 343 + }, + { + "epoch": 0.7730337078651686, + "grad_norm": 4.866581916809082, + "learning_rate": 3.140953697332979e-05, + "loss": 0.9882, + "step": 344 + }, + { + "epoch": 0.7752808988764045, + "grad_norm": 3.9585559368133545, + "learning_rate": 3.136812866033063e-05, + "loss": 0.3707, + "step": 345 + }, + { + "epoch": 0.7775280898876404, + "grad_norm": 4.253391265869141, + "learning_rate": 3.132651849236871e-05, + "loss": 0.7103, + "step": 346 + }, + { + "epoch": 0.7797752808988764, + "grad_norm": 0.5847011208534241, + "learning_rate": 3.128470727954383e-05, + "loss": 0.0372, + "step": 347 + }, + { + "epoch": 0.7820224719101123, + "grad_norm": 0.5127836465835571, + "learning_rate": 3.124269583586989e-05, + "loss": 0.028, + "step": 348 + }, + { + "epoch": 0.7842696629213484, + "grad_norm": 4.145182132720947, + "learning_rate": 3.120048497925904e-05, + "loss": 0.7676, + "step": 349 + }, + { + "epoch": 0.7865168539325843, + "grad_norm": 4.833105087280273, + "learning_rate": 3.1158075531505755e-05, + "loss": 0.6754, + "step": 350 + }, + { + "epoch": 0.7887640449438202, + "grad_norm": 0.49345946311950684, + "learning_rate": 3.1115468318270844e-05, + "loss": 0.0439, + "step": 351 + }, + { + "epoch": 0.7910112359550562, + "grad_norm": 3.357720375061035, + "learning_rate": 3.107266416906538e-05, + "loss": 0.8039, + "step": 352 + }, + { + "epoch": 0.7932584269662921, + "grad_norm": 0.2371903359889984, + "learning_rate": 3.1029663917234514e-05, + "loss": 0.0104, + "step": 353 + }, + { + "epoch": 0.7955056179775281, + "grad_norm": 0.48881796002388, + "learning_rate": 3.098646839994132e-05, + "loss": 0.0555, + "step": 354 + }, + { + "epoch": 0.797752808988764, + "grad_norm": 3.3021090030670166, + "learning_rate": 3.094307845815042e-05, + "loss": 0.8646, + "step": 355 + }, + { + "epoch": 0.8, + "grad_norm": 3.0412533283233643, + "learning_rate": 3.0899494936611663e-05, + "loss": 0.7781, + "step": 356 + }, + { + "epoch": 0.802247191011236, + "grad_norm": 0.30917835235595703, + "learning_rate": 3.085571868384366e-05, + "loss": 0.011, + "step": 357 + }, + { + "epoch": 0.8044943820224719, + "grad_norm": 3.6957950592041016, + "learning_rate": 3.081175055211726e-05, + "loss": 0.3267, + "step": 358 + }, + { + "epoch": 0.8067415730337079, + "grad_norm": 7.202300071716309, + "learning_rate": 3.0767591397438974e-05, + "loss": 2.5281, + "step": 359 + }, + { + "epoch": 0.8089887640449438, + "grad_norm": 2.9833834171295166, + "learning_rate": 3.072324207953429e-05, + "loss": 0.301, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.6793336868286133, + "eval_VitaminC_cosine_ap": 0.5555632752592039, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.28029173612594604, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 265.5102844238281, + "eval_VitaminC_dot_ap": 0.5326105108889087, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 106.37774658203125, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.296594619750977, + "eval_VitaminC_euclidean_ap": 0.5592294311948881, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.58568572998047, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 306.79913330078125, + "eval_VitaminC_manhattan_ap": 0.5598941655081213, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 512.0101318359375, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 306.79913330078125, + "eval_VitaminC_max_ap": 0.5598941655081213, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 512.0101318359375, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5598941655081213, + "eval_sts-test_pearson_cosine": 0.8832151520369376, + "eval_sts-test_pearson_dot": 0.8763916954110884, + "eval_sts-test_pearson_euclidean": 0.9046869354209082, + "eval_sts-test_pearson_manhattan": 0.9047119917370259, + "eval_sts-test_pearson_max": 0.9047119917370259, + "eval_sts-test_spearman_cosine": 0.9054341922225841, + "eval_sts-test_spearman_dot": 0.8786041104705073, + "eval_sts-test_spearman_euclidean": 0.9002407635868509, + "eval_sts-test_spearman_manhattan": 0.9006719867416183, + "eval_sts-test_spearman_max": 0.9054341922225841, + "eval_vitaminc-pairs_loss": 1.4290639162063599, + "eval_vitaminc-pairs_runtime": 1.8905, + "eval_vitaminc-pairs_samples_per_second": 57.128, + "eval_vitaminc-pairs_steps_per_second": 1.058, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_negation-triplets_loss": 0.9030703902244568, + "eval_negation-triplets_runtime": 0.2986, + "eval_negation-triplets_samples_per_second": 214.299, + "eval_negation-triplets_steps_per_second": 3.348, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_scitail-pairs-pos_loss": 0.10728535801172256, + "eval_scitail-pairs-pos_runtime": 0.3831, + "eval_scitail-pairs-pos_samples_per_second": 140.965, + "eval_scitail-pairs-pos_steps_per_second": 2.61, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_scitail-pairs-qa_loss": 0.0005650219391100109, + "eval_scitail-pairs-qa_runtime": 0.5259, + "eval_scitail-pairs-qa_samples_per_second": 243.397, + "eval_scitail-pairs-qa_steps_per_second": 3.803, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_xsum-pairs_loss": 0.025990577414631844, + "eval_xsum-pairs_runtime": 2.734, + "eval_xsum-pairs_samples_per_second": 46.818, + "eval_xsum-pairs_steps_per_second": 0.732, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_sciq_pairs_loss": 0.016017427667975426, + "eval_sciq_pairs_runtime": 2.8252, + "eval_sciq_pairs_samples_per_second": 45.307, + "eval_sciq_pairs_steps_per_second": 0.708, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_qasc_pairs_loss": 0.10250324755907059, + "eval_qasc_pairs_runtime": 0.6511, + "eval_qasc_pairs_samples_per_second": 196.585, + "eval_qasc_pairs_steps_per_second": 3.072, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_openbookqa_pairs_loss": 0.6710968613624573, + "eval_openbookqa_pairs_runtime": 0.5776, + "eval_openbookqa_pairs_samples_per_second": 221.625, + "eval_openbookqa_pairs_steps_per_second": 3.463, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_msmarco_pairs_loss": 0.14522777497768402, + "eval_msmarco_pairs_runtime": 1.4981, + "eval_msmarco_pairs_samples_per_second": 85.441, + "eval_msmarco_pairs_steps_per_second": 1.335, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_nq_pairs_loss": 0.10225611180067062, + "eval_nq_pairs_runtime": 2.3595, + "eval_nq_pairs_samples_per_second": 54.248, + "eval_nq_pairs_steps_per_second": 0.848, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_trivia_pairs_loss": 0.5312957167625427, + "eval_trivia_pairs_runtime": 3.5813, + "eval_trivia_pairs_samples_per_second": 35.741, + "eval_trivia_pairs_steps_per_second": 0.558, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_gooaq_pairs_loss": 0.27713337540626526, + "eval_gooaq_pairs_runtime": 0.9166, + "eval_gooaq_pairs_samples_per_second": 139.645, + "eval_gooaq_pairs_steps_per_second": 2.182, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_paws-pos_loss": 0.024326296523213387, + "eval_paws-pos_runtime": 0.6893, + "eval_paws-pos_samples_per_second": 185.682, + "eval_paws-pos_steps_per_second": 2.901, + "step": 360 + }, + { + "epoch": 0.8112359550561797, + "grad_norm": 4.372533798217773, + "learning_rate": 3.067870346183096e-05, + "loss": 0.7533, + "step": 361 + }, + { + "epoch": 0.8134831460674158, + "grad_norm": 2.6585452556610107, + "learning_rate": 3.063397641144216e-05, + "loss": 0.2958, + "step": 362 + }, + { + "epoch": 0.8157303370786517, + "grad_norm": 4.378647327423096, + "learning_rate": 3.058906179914962e-05, + "loss": 0.8296, + "step": 363 + }, + { + "epoch": 0.8179775280898877, + "grad_norm": 3.1601309776306152, + "learning_rate": 3.0543960499386694e-05, + "loss": 0.3191, + "step": 364 + }, + { + "epoch": 0.8202247191011236, + "grad_norm": 3.446498394012451, + "learning_rate": 3.049867339022129e-05, + "loss": 0.7866, + "step": 365 + }, + { + "epoch": 0.8224719101123595, + "grad_norm": 3.0058486461639404, + "learning_rate": 3.0453201353338826e-05, + "loss": 0.3157, + "step": 366 + }, + { + "epoch": 0.8247191011235955, + "grad_norm": 4.380611419677734, + "learning_rate": 3.040754527402502e-05, + "loss": 0.7402, + "step": 367 + }, + { + "epoch": 0.8269662921348314, + "grad_norm": 3.8081209659576416, + "learning_rate": 3.036170604114869e-05, + "loss": 0.4957, + "step": 368 + }, + { + "epoch": 0.8292134831460675, + "grad_norm": 4.2056989669799805, + "learning_rate": 3.031568454714442e-05, + "loss": 0.8505, + "step": 369 + }, + { + "epoch": 0.8314606741573034, + "grad_norm": 3.101804733276367, + "learning_rate": 3.0269481687995207e-05, + "loss": 0.7702, + "step": 370 + }, + { + "epoch": 0.8337078651685393, + "grad_norm": 4.0704345703125, + "learning_rate": 3.0223098363215002e-05, + "loss": 0.7591, + "step": 371 + }, + { + "epoch": 0.8359550561797753, + "grad_norm": 2.9631364345550537, + "learning_rate": 3.0176535475831208e-05, + "loss": 0.727, + "step": 372 + }, + { + "epoch": 0.8382022471910112, + "grad_norm": 3.3760929107666016, + "learning_rate": 3.01297939323671e-05, + "loss": 0.3233, + "step": 373 + }, + { + "epoch": 0.8404494382022472, + "grad_norm": 4.116260051727295, + "learning_rate": 3.0082874642824164e-05, + "loss": 0.8738, + "step": 374 + }, + { + "epoch": 0.8426966292134831, + "grad_norm": 0.40298929810523987, + "learning_rate": 3.0035778520664388e-05, + "loss": 0.0393, + "step": 375 + }, + { + "epoch": 0.8449438202247191, + "grad_norm": 3.0647614002227783, + "learning_rate": 2.9988506482792485e-05, + "loss": 0.7454, + "step": 376 + }, + { + "epoch": 0.8471910112359551, + "grad_norm": 2.951953649520874, + "learning_rate": 2.994105944953803e-05, + "loss": 0.8297, + "step": 377 + }, + { + "epoch": 0.849438202247191, + "grad_norm": 4.049951553344727, + "learning_rate": 2.9893438344637538e-05, + "loss": 0.7802, + "step": 378 + }, + { + "epoch": 0.851685393258427, + "grad_norm": 3.7383949756622314, + "learning_rate": 2.984564409521651e-05, + "loss": 0.6229, + "step": 379 + }, + { + "epoch": 0.8539325842696629, + "grad_norm": 0.0, + "learning_rate": 2.979767763177134e-05, + "loss": 0.0, + "step": 380 + }, + { + "epoch": 0.8561797752808988, + "grad_norm": 3.399641513824463, + "learning_rate": 2.9749539888151244e-05, + "loss": 0.3506, + "step": 381 + }, + { + "epoch": 0.8584269662921349, + "grad_norm": 0.48723292350769043, + "learning_rate": 2.9701231801540032e-05, + "loss": 0.041, + "step": 382 + }, + { + "epoch": 0.8606741573033708, + "grad_norm": 3.1171765327453613, + "learning_rate": 2.9652754312437897e-05, + "loss": 0.725, + "step": 383 + }, + { + "epoch": 0.8629213483146068, + "grad_norm": 2.6491808891296387, + "learning_rate": 2.9604108364643112e-05, + "loss": 0.257, + "step": 384 + }, + { + "epoch": 0.8651685393258427, + "grad_norm": 4.025605201721191, + "learning_rate": 2.9555294905233606e-05, + "loss": 0.7912, + "step": 385 + }, + { + "epoch": 0.8674157303370786, + "grad_norm": 4.142299652099609, + "learning_rate": 2.9506314884548583e-05, + "loss": 0.8915, + "step": 386 + }, + { + "epoch": 0.8696629213483146, + "grad_norm": 2.943582534790039, + "learning_rate": 2.945716925616998e-05, + "loss": 0.779, + "step": 387 + }, + { + "epoch": 0.8719101123595505, + "grad_norm": 4.478114604949951, + "learning_rate": 2.9407858976903913e-05, + "loss": 0.7828, + "step": 388 + }, + { + "epoch": 0.8741573033707866, + "grad_norm": 3.9878995418548584, + "learning_rate": 2.935838500676207e-05, + "loss": 0.7462, + "step": 389 + }, + { + "epoch": 0.8764044943820225, + "grad_norm": 3.7733311653137207, + "learning_rate": 2.9308748308942983e-05, + "loss": 0.7913, + "step": 390 + }, + { + "epoch": 0.8786516853932584, + "grad_norm": 3.179732322692871, + "learning_rate": 2.9258949849813315e-05, + "loss": 0.3209, + "step": 391 + }, + { + "epoch": 0.8808988764044944, + "grad_norm": 3.6665351390838623, + "learning_rate": 2.9208990598889008e-05, + "loss": 0.5932, + "step": 392 + }, + { + "epoch": 0.8831460674157303, + "grad_norm": 0.545093834400177, + "learning_rate": 2.9158871528816442e-05, + "loss": 0.0613, + "step": 393 + }, + { + "epoch": 0.8853932584269663, + "grad_norm": 5.226474285125732, + "learning_rate": 2.9108593615353467e-05, + "loss": 0.8802, + "step": 394 + }, + { + "epoch": 0.8876404494382022, + "grad_norm": 3.691817283630371, + "learning_rate": 2.9058157837350437e-05, + "loss": 0.6116, + "step": 395 + }, + { + "epoch": 0.8898876404494382, + "grad_norm": 0.4754512906074524, + "learning_rate": 2.900756517673113e-05, + "loss": 0.0537, + "step": 396 + }, + { + "epoch": 0.8921348314606742, + "grad_norm": 2.874117374420166, + "learning_rate": 2.8956816618473647e-05, + "loss": 0.3006, + "step": 397 + }, + { + "epoch": 0.8943820224719101, + "grad_norm": 3.8957912921905518, + "learning_rate": 2.890591315059121e-05, + "loss": 0.7636, + "step": 398 + }, + { + "epoch": 0.8966292134831461, + "grad_norm": 3.7385432720184326, + "learning_rate": 2.8854855764112973e-05, + "loss": 0.612, + "step": 399 + }, + { + "epoch": 0.898876404494382, + "grad_norm": 3.7403082847595215, + "learning_rate": 2.880364545306468e-05, + "loss": 0.54, + "step": 400 + }, + { + "epoch": 0.9011235955056179, + "grad_norm": 2.7360849380493164, + "learning_rate": 2.8752283214449328e-05, + "loss": 0.2761, + "step": 401 + }, + { + "epoch": 0.903370786516854, + "grad_norm": 8.988025665283203, + "learning_rate": 2.8700770048227775e-05, + "loss": 1.2668, + "step": 402 + }, + { + "epoch": 0.9056179775280899, + "grad_norm": 3.411295175552368, + "learning_rate": 2.864910695729925e-05, + "loss": 0.8066, + "step": 403 + }, + { + "epoch": 0.9078651685393259, + "grad_norm": 0.3018481135368347, + "learning_rate": 2.8597294947481834e-05, + "loss": 0.0094, + "step": 404 + }, + { + "epoch": 0.9101123595505618, + "grad_norm": 4.116438388824463, + "learning_rate": 2.8545335027492885e-05, + "loss": 0.673, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.7188639044761658, + "eval_VitaminC_cosine_ap": 0.5516905675485202, + "eval_VitaminC_cosine_f1": 0.6675712347354138, + "eval_VitaminC_cosine_f1_threshold": 0.42514583468437195, + "eval_VitaminC_cosine_precision": 0.5061728395061729, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 320.3775329589844, + "eval_VitaminC_dot_ap": 0.5343066680873013, + "eval_VitaminC_dot_f1": 0.6720867208672087, + "eval_VitaminC_dot_f1_threshold": 152.709716796875, + "eval_VitaminC_dot_precision": 0.5092402464065708, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 15.12228775024414, + "eval_VitaminC_euclidean_ap": 0.5542894540784595, + "eval_VitaminC_euclidean_f1": 0.6640211640211641, + "eval_VitaminC_euclidean_f1_threshold": 24.3716983795166, + "eval_VitaminC_euclidean_precision": 0.497029702970297, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 305.93597412109375, + "eval_VitaminC_manhattan_ap": 0.5533328154567183, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 509.4247741699219, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 320.3775329589844, + "eval_VitaminC_max_ap": 0.5542894540784595, + "eval_VitaminC_max_f1": 0.6720867208672087, + "eval_VitaminC_max_f1_threshold": 509.4247741699219, + "eval_VitaminC_max_precision": 0.5092402464065708, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5542894540784595, + "eval_sts-test_pearson_cosine": 0.8820726638294588, + "eval_sts-test_pearson_dot": 0.8723940521896922, + "eval_sts-test_pearson_euclidean": 0.9038814103150634, + "eval_sts-test_pearson_manhattan": 0.904449390563823, + "eval_sts-test_pearson_max": 0.904449390563823, + "eval_sts-test_spearman_cosine": 0.9051641183600871, + "eval_sts-test_spearman_dot": 0.8721959088443044, + "eval_sts-test_spearman_euclidean": 0.8999642007914521, + "eval_sts-test_spearman_manhattan": 0.9005904051921018, + "eval_sts-test_spearman_max": 0.9051641183600871, + "eval_vitaminc-pairs_loss": 1.48486328125, + "eval_vitaminc-pairs_runtime": 1.8874, + "eval_vitaminc-pairs_samples_per_second": 57.222, + "eval_vitaminc-pairs_steps_per_second": 1.06, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_negation-triplets_loss": 0.9023827314376831, + "eval_negation-triplets_runtime": 0.302, + "eval_negation-triplets_samples_per_second": 211.927, + "eval_negation-triplets_steps_per_second": 3.311, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_scitail-pairs-pos_loss": 0.10495099425315857, + "eval_scitail-pairs-pos_runtime": 0.3856, + "eval_scitail-pairs-pos_samples_per_second": 140.031, + "eval_scitail-pairs-pos_steps_per_second": 2.593, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_scitail-pairs-qa_loss": 0.0008332311408594251, + "eval_scitail-pairs-qa_runtime": 0.5224, + "eval_scitail-pairs-qa_samples_per_second": 245.005, + "eval_scitail-pairs-qa_steps_per_second": 3.828, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_xsum-pairs_loss": 0.028531953692436218, + "eval_xsum-pairs_runtime": 2.7425, + "eval_xsum-pairs_samples_per_second": 46.672, + "eval_xsum-pairs_steps_per_second": 0.729, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_sciq_pairs_loss": 0.015175853855907917, + "eval_sciq_pairs_runtime": 2.8294, + "eval_sciq_pairs_samples_per_second": 45.239, + "eval_sciq_pairs_steps_per_second": 0.707, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_qasc_pairs_loss": 0.09416583180427551, + "eval_qasc_pairs_runtime": 0.6538, + "eval_qasc_pairs_samples_per_second": 195.781, + "eval_qasc_pairs_steps_per_second": 3.059, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_openbookqa_pairs_loss": 0.715216875076294, + "eval_openbookqa_pairs_runtime": 0.578, + "eval_openbookqa_pairs_samples_per_second": 221.449, + "eval_openbookqa_pairs_steps_per_second": 3.46, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_msmarco_pairs_loss": 0.1417744755744934, + "eval_msmarco_pairs_runtime": 1.4882, + "eval_msmarco_pairs_samples_per_second": 86.012, + "eval_msmarco_pairs_steps_per_second": 1.344, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_nq_pairs_loss": 0.10870223492383957, + "eval_nq_pairs_runtime": 2.3451, + "eval_nq_pairs_samples_per_second": 54.583, + "eval_nq_pairs_steps_per_second": 0.853, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_trivia_pairs_loss": 0.49194595217704773, + "eval_trivia_pairs_runtime": 3.5796, + "eval_trivia_pairs_samples_per_second": 35.759, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_gooaq_pairs_loss": 0.2616226375102997, + "eval_gooaq_pairs_runtime": 0.9137, + "eval_gooaq_pairs_samples_per_second": 140.093, + "eval_gooaq_pairs_steps_per_second": 2.189, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_paws-pos_loss": 0.02422034554183483, + "eval_paws-pos_runtime": 0.6895, + "eval_paws-pos_samples_per_second": 185.641, + "eval_paws-pos_steps_per_second": 2.901, + "step": 405 + }, + { + "epoch": 0.9123595505617977, + "grad_norm": 3.427104949951172, + "learning_rate": 2.8493228208929387e-05, + "loss": 0.5189, + "step": 406 + }, + { + "epoch": 0.9146067415730337, + "grad_norm": 4.941195487976074, + "learning_rate": 2.8440975506248268e-05, + "loss": 0.649, + "step": 407 + }, + { + "epoch": 0.9168539325842696, + "grad_norm": 2.7992403507232666, + "learning_rate": 2.8388577936746633e-05, + "loss": 0.2982, + "step": 408 + }, + { + "epoch": 0.9191011235955057, + "grad_norm": 3.8877484798431396, + "learning_rate": 2.833603652054199e-05, + "loss": 0.7511, + "step": 409 + }, + { + "epoch": 0.9213483146067416, + "grad_norm": 3.2458090782165527, + "learning_rate": 2.8283352280552348e-05, + "loss": 0.5164, + "step": 410 + }, + { + "epoch": 0.9235955056179775, + "grad_norm": 3.7385945320129395, + "learning_rate": 2.8230526242476332e-05, + "loss": 0.5924, + "step": 411 + }, + { + "epoch": 0.9258426966292135, + "grad_norm": 4.369627952575684, + "learning_rate": 2.8177559434773203e-05, + "loss": 0.8191, + "step": 412 + }, + { + "epoch": 0.9280898876404494, + "grad_norm": 2.95206356048584, + "learning_rate": 2.8124452888642838e-05, + "loss": 0.2311, + "step": 413 + }, + { + "epoch": 0.9303370786516854, + "grad_norm": 3.984375238418579, + "learning_rate": 2.8071207638005662e-05, + "loss": 0.7421, + "step": 414 + }, + { + "epoch": 0.9325842696629213, + "grad_norm": 3.0188541412353516, + "learning_rate": 2.801782471948248e-05, + "loss": 0.2936, + "step": 415 + }, + { + "epoch": 0.9348314606741573, + "grad_norm": 4.104308605194092, + "learning_rate": 2.7964305172374362e-05, + "loss": 0.737, + "step": 416 + }, + { + "epoch": 0.9370786516853933, + "grad_norm": 3.686523675918579, + "learning_rate": 2.791065003864235e-05, + "loss": 0.6539, + "step": 417 + }, + { + "epoch": 0.9393258426966292, + "grad_norm": 3.839590311050415, + "learning_rate": 2.785686036288719e-05, + "loss": 0.6855, + "step": 418 + }, + { + "epoch": 0.9415730337078652, + "grad_norm": 4.174718856811523, + "learning_rate": 2.780293719232902e-05, + "loss": 0.8134, + "step": 419 + }, + { + "epoch": 0.9438202247191011, + "grad_norm": 4.046380043029785, + "learning_rate": 2.7748881576786946e-05, + "loss": 0.6885, + "step": 420 + }, + { + "epoch": 0.946067415730337, + "grad_norm": 3.4202940464019775, + "learning_rate": 2.7694694568658613e-05, + "loss": 0.5581, + "step": 421 + }, + { + "epoch": 0.9483146067415731, + "grad_norm": 3.787081718444824, + "learning_rate": 2.764037722289973e-05, + "loss": 0.8029, + "step": 422 + }, + { + "epoch": 0.950561797752809, + "grad_norm": 3.870718240737915, + "learning_rate": 2.7585930597003524e-05, + "loss": 0.8126, + "step": 423 + }, + { + "epoch": 0.952808988764045, + "grad_norm": 3.1959424018859863, + "learning_rate": 2.753135575098015e-05, + "loss": 0.8425, + "step": 424 + }, + { + "epoch": 0.9550561797752809, + "grad_norm": 0.4186573922634125, + "learning_rate": 2.7476653747336047e-05, + "loss": 0.049, + "step": 425 + }, + { + "epoch": 0.9573033707865168, + "grad_norm": 4.299917697906494, + "learning_rate": 2.7421825651053265e-05, + "loss": 0.7849, + "step": 426 + }, + { + "epoch": 0.9595505617977528, + "grad_norm": 2.6435227394104004, + "learning_rate": 2.736687252956873e-05, + "loss": 0.068, + "step": 427 + }, + { + "epoch": 0.9617977528089887, + "grad_norm": 2.717653274536133, + "learning_rate": 2.7311795452753443e-05, + "loss": 0.2925, + "step": 428 + }, + { + "epoch": 0.9640449438202248, + "grad_norm": 3.6929807662963867, + "learning_rate": 2.7256595492891683e-05, + "loss": 0.777, + "step": 429 + }, + { + "epoch": 0.9662921348314607, + "grad_norm": 2.8760790824890137, + "learning_rate": 2.720127372466011e-05, + "loss": 0.7397, + "step": 430 + }, + { + "epoch": 0.9685393258426966, + "grad_norm": 0.03685740381479263, + "learning_rate": 2.714583122510683e-05, + "loss": 0.0007, + "step": 431 + }, + { + "epoch": 0.9707865168539326, + "grad_norm": 4.058692455291748, + "learning_rate": 2.709026907363047e-05, + "loss": 0.8535, + "step": 432 + }, + { + "epoch": 0.9730337078651685, + "grad_norm": 4.2914276123046875, + "learning_rate": 2.703458835195911e-05, + "loss": 0.7026, + "step": 433 + }, + { + "epoch": 0.9752808988764045, + "grad_norm": 3.735518217086792, + "learning_rate": 2.6978790144129262e-05, + "loss": 0.7557, + "step": 434 + }, + { + "epoch": 0.9775280898876404, + "grad_norm": 4.058504104614258, + "learning_rate": 2.6922875536464747e-05, + "loss": 0.7225, + "step": 435 + }, + { + "epoch": 0.9797752808988764, + "grad_norm": 0.0, + "learning_rate": 2.6866845617555555e-05, + "loss": 0.0, + "step": 436 + }, + { + "epoch": 0.9820224719101124, + "grad_norm": 5.648872375488281, + "learning_rate": 2.6810701478236642e-05, + "loss": 0.4131, + "step": 437 + }, + { + "epoch": 0.9842696629213483, + "grad_norm": 2.7032744884490967, + "learning_rate": 2.6754444211566702e-05, + "loss": 0.2824, + "step": 438 + }, + { + "epoch": 0.9865168539325843, + "grad_norm": 3.150801420211792, + "learning_rate": 2.6698074912806882e-05, + "loss": 0.3144, + "step": 439 + }, + { + "epoch": 0.9887640449438202, + "grad_norm": 2.3572490215301514, + "learning_rate": 2.6641594679399448e-05, + "loss": 0.0509, + "step": 440 + }, + { + "epoch": 0.9910112359550561, + "grad_norm": 3.2544448375701904, + "learning_rate": 2.6585004610946452e-05, + "loss": 0.7645, + "step": 441 + }, + { + "epoch": 0.9932584269662922, + "grad_norm": 4.310440540313721, + "learning_rate": 2.6528305809188273e-05, + "loss": 0.2787, + "step": 442 + }, + { + "epoch": 0.9955056179775281, + "grad_norm": 3.863487482070923, + "learning_rate": 2.6471499377982225e-05, + "loss": 0.64, + "step": 443 + }, + { + "epoch": 0.9977528089887641, + "grad_norm": 6.1020612716674805, + "learning_rate": 2.6414586423281017e-05, + "loss": 0.4045, + "step": 444 + }, + { + "epoch": 1.0, + "grad_norm": 3.1245224475860596, + "learning_rate": 2.6357568053111255e-05, + "loss": 0.7661, + "step": 445 + }, + { + "epoch": 1.002247191011236, + "grad_norm": 3.7888576984405518, + "learning_rate": 2.6300445377551847e-05, + "loss": 0.7335, + "step": 446 + }, + { + "epoch": 1.0044943820224719, + "grad_norm": 3.935758590698242, + "learning_rate": 2.62432195087124e-05, + "loss": 0.7835, + "step": 447 + }, + { + "epoch": 1.006741573033708, + "grad_norm": 3.7737417221069336, + "learning_rate": 2.6185891560711587e-05, + "loss": 0.7674, + "step": 448 + }, + { + "epoch": 1.0089887640449438, + "grad_norm": 0.457439124584198, + "learning_rate": 2.612846264965542e-05, + "loss": 0.0489, + "step": 449 + }, + { + "epoch": 1.0112359550561798, + "grad_norm": 3.515545606613159, + "learning_rate": 2.607093389361555e-05, + "loss": 0.3104, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_VitaminC_cosine_accuracy": 0.5625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7603898048400879, + "eval_VitaminC_cosine_ap": 0.5525005100698708, + "eval_VitaminC_cosine_f1": 0.6685006877579092, + "eval_VitaminC_cosine_f1_threshold": 0.4857867360115051, + "eval_VitaminC_cosine_precision": 0.5105042016806722, + "eval_VitaminC_cosine_recall": 0.9681274900398407, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 325.483154296875, + "eval_VitaminC_dot_ap": 0.5344057014880635, + "eval_VitaminC_dot_f1": 0.6675749318801091, + "eval_VitaminC_dot_f1_threshold": 159.2823028564453, + "eval_VitaminC_dot_precision": 0.5072463768115942, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.577638626098633, + "eval_VitaminC_euclidean_ap": 0.5540831040718627, + "eval_VitaminC_euclidean_f1": 0.6666666666666667, + "eval_VitaminC_euclidean_f1_threshold": 21.39883804321289, + "eval_VitaminC_euclidean_precision": 0.5030425963488844, + "eval_VitaminC_euclidean_recall": 0.9880478087649402, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 241.35984802246094, + "eval_VitaminC_manhattan_ap": 0.5536965508228381, + "eval_VitaminC_manhattan_f1": 0.6657754010695187, + "eval_VitaminC_manhattan_f1_threshold": 463.26080322265625, + "eval_VitaminC_manhattan_precision": 0.5010060362173038, + "eval_VitaminC_manhattan_recall": 0.9920318725099602, + "eval_VitaminC_max_accuracy": 0.5625, + "eval_VitaminC_max_accuracy_threshold": 325.483154296875, + "eval_VitaminC_max_ap": 0.5540831040718627, + "eval_VitaminC_max_f1": 0.6685006877579092, + "eval_VitaminC_max_f1_threshold": 463.26080322265625, + "eval_VitaminC_max_precision": 0.5105042016806722, + "eval_VitaminC_max_recall": 0.9920318725099602, + "eval_sequential_score": 0.5540831040718627, + "eval_sts-test_pearson_cosine": 0.8812335915964673, + "eval_sts-test_pearson_dot": 0.8749461926810898, + "eval_sts-test_pearson_euclidean": 0.9037170548962163, + "eval_sts-test_pearson_manhattan": 0.9046002457312785, + "eval_sts-test_pearson_max": 0.9046002457312785, + "eval_sts-test_spearman_cosine": 0.9043227946459288, + "eval_sts-test_spearman_dot": 0.8763633253101171, + "eval_sts-test_spearman_euclidean": 0.8995340964182194, + "eval_sts-test_spearman_manhattan": 0.9002530254324721, + "eval_sts-test_spearman_max": 0.9043227946459288, + "eval_vitaminc-pairs_loss": 1.4284634590148926, + "eval_vitaminc-pairs_runtime": 1.9114, + "eval_vitaminc-pairs_samples_per_second": 56.503, + "eval_vitaminc-pairs_steps_per_second": 1.046, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_negation-triplets_loss": 0.8765377402305603, + "eval_negation-triplets_runtime": 0.3186, + "eval_negation-triplets_samples_per_second": 200.858, + "eval_negation-triplets_steps_per_second": 3.138, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_scitail-pairs-pos_loss": 0.06986676901578903, + "eval_scitail-pairs-pos_runtime": 0.4519, + "eval_scitail-pairs-pos_samples_per_second": 119.485, + "eval_scitail-pairs-pos_steps_per_second": 2.213, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_scitail-pairs-qa_loss": 0.0008211968233808875, + "eval_scitail-pairs-qa_runtime": 0.5761, + "eval_scitail-pairs-qa_samples_per_second": 222.193, + "eval_scitail-pairs-qa_steps_per_second": 3.472, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_xsum-pairs_loss": 0.028749318793416023, + "eval_xsum-pairs_runtime": 2.7556, + "eval_xsum-pairs_samples_per_second": 46.45, + "eval_xsum-pairs_steps_per_second": 0.726, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_sciq_pairs_loss": 0.01784924976527691, + "eval_sciq_pairs_runtime": 2.8996, + "eval_sciq_pairs_samples_per_second": 44.143, + "eval_sciq_pairs_steps_per_second": 0.69, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_qasc_pairs_loss": 0.09589868038892746, + "eval_qasc_pairs_runtime": 0.6801, + "eval_qasc_pairs_samples_per_second": 188.207, + "eval_qasc_pairs_steps_per_second": 2.941, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_openbookqa_pairs_loss": 0.7216827273368835, + "eval_openbookqa_pairs_runtime": 0.5958, + "eval_openbookqa_pairs_samples_per_second": 214.846, + "eval_openbookqa_pairs_steps_per_second": 3.357, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_msmarco_pairs_loss": 0.15124906599521637, + "eval_msmarco_pairs_runtime": 1.5017, + "eval_msmarco_pairs_samples_per_second": 85.239, + "eval_msmarco_pairs_steps_per_second": 1.332, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_nq_pairs_loss": 0.10319234430789948, + "eval_nq_pairs_runtime": 2.3696, + "eval_nq_pairs_samples_per_second": 54.018, + "eval_nq_pairs_steps_per_second": 0.844, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_trivia_pairs_loss": 0.48776012659072876, + "eval_trivia_pairs_runtime": 3.5941, + "eval_trivia_pairs_samples_per_second": 35.614, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_gooaq_pairs_loss": 0.26884058117866516, + "eval_gooaq_pairs_runtime": 0.9231, + "eval_gooaq_pairs_samples_per_second": 138.665, + "eval_gooaq_pairs_steps_per_second": 2.167, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_paws-pos_loss": 0.02506968565285206, + "eval_paws-pos_runtime": 0.7124, + "eval_paws-pos_samples_per_second": 179.665, + "eval_paws-pos_steps_per_second": 2.807, + "step": 450 + }, + { + "epoch": 1.0134831460674156, + "grad_norm": 3.3714258670806885, + "learning_rate": 2.6013306412607486e-05, + "loss": 0.2977, + "step": 451 + }, + { + "epoch": 1.0157303370786517, + "grad_norm": 3.0152552127838135, + "learning_rate": 2.59555813285688e-05, + "loss": 0.7256, + "step": 452 + }, + { + "epoch": 1.0179775280898877, + "grad_norm": 0.36391642689704895, + "learning_rate": 2.589775976533726e-05, + "loss": 0.0327, + "step": 453 + }, + { + "epoch": 1.0202247191011236, + "grad_norm": 2.959923505783081, + "learning_rate": 2.5839842848628985e-05, + "loss": 0.7372, + "step": 454 + }, + { + "epoch": 1.0224719101123596, + "grad_norm": 0.8520237803459167, + "learning_rate": 2.5781831706016506e-05, + "loss": 0.0518, + "step": 455 + }, + { + "epoch": 1.0247191011235954, + "grad_norm": 3.161862850189209, + "learning_rate": 2.5723727466906813e-05, + "loss": 0.7668, + "step": 456 + }, + { + "epoch": 1.0269662921348315, + "grad_norm": 4.314478397369385, + "learning_rate": 2.5665531262519385e-05, + "loss": 0.6634, + "step": 457 + }, + { + "epoch": 1.0292134831460673, + "grad_norm": 4.253237247467041, + "learning_rate": 2.5607244225864135e-05, + "loss": 0.6022, + "step": 458 + }, + { + "epoch": 1.0314606741573034, + "grad_norm": 3.9452831745147705, + "learning_rate": 2.5548867491719395e-05, + "loss": 0.7255, + "step": 459 + }, + { + "epoch": 1.0337078651685394, + "grad_norm": 2.962646007537842, + "learning_rate": 2.549040219660978e-05, + "loss": 0.2823, + "step": 460 + }, + { + "epoch": 1.0359550561797752, + "grad_norm": 2.687352418899536, + "learning_rate": 2.543184947878408e-05, + "loss": 0.2614, + "step": 461 + }, + { + "epoch": 1.0382022471910113, + "grad_norm": 2.9925472736358643, + "learning_rate": 2.5373210478193118e-05, + "loss": 0.5231, + "step": 462 + }, + { + "epoch": 1.0404494382022471, + "grad_norm": 0.694948136806488, + "learning_rate": 2.5314486336467516e-05, + "loss": 0.0424, + "step": 463 + }, + { + "epoch": 1.0426966292134832, + "grad_norm": 5.543944835662842, + "learning_rate": 2.525567819689551e-05, + "loss": 0.9838, + "step": 464 + }, + { + "epoch": 1.0449438202247192, + "grad_norm": 6.339949607849121, + "learning_rate": 2.5196787204400655e-05, + "loss": 2.4683, + "step": 465 + }, + { + "epoch": 1.047191011235955, + "grad_norm": 0.48354002833366394, + "learning_rate": 2.5137814505519576e-05, + "loss": 0.0497, + "step": 466 + }, + { + "epoch": 1.049438202247191, + "grad_norm": 0.5280358791351318, + "learning_rate": 2.5078761248379596e-05, + "loss": 0.0766, + "step": 467 + }, + { + "epoch": 1.051685393258427, + "grad_norm": 3.0178725719451904, + "learning_rate": 2.5019628582676428e-05, + "loss": 0.7333, + "step": 468 + }, + { + "epoch": 1.053932584269663, + "grad_norm": 3.958301305770874, + "learning_rate": 2.4960417659651765e-05, + "loss": 0.7881, + "step": 469 + }, + { + "epoch": 1.0561797752808988, + "grad_norm": 3.143247127532959, + "learning_rate": 2.4901129632070887e-05, + "loss": 0.7611, + "step": 470 + }, + { + "epoch": 1.0584269662921348, + "grad_norm": 3.9136295318603516, + "learning_rate": 2.48417656542002e-05, + "loss": 0.6023, + "step": 471 + }, + { + "epoch": 1.060674157303371, + "grad_norm": 3.149609327316284, + "learning_rate": 2.4782326881784757e-05, + "loss": 0.7884, + "step": 472 + }, + { + "epoch": 1.0629213483146067, + "grad_norm": 4.164999961853027, + "learning_rate": 2.4722814472025798e-05, + "loss": 0.8465, + "step": 473 + }, + { + "epoch": 1.0651685393258428, + "grad_norm": 2.767392158508301, + "learning_rate": 2.466322958355817e-05, + "loss": 0.2752, + "step": 474 + }, + { + "epoch": 1.0674157303370786, + "grad_norm": 2.7601747512817383, + "learning_rate": 2.4603573376427804e-05, + "loss": 0.2648, + "step": 475 + }, + { + "epoch": 1.0696629213483146, + "grad_norm": 3.3139634132385254, + "learning_rate": 2.4543847012069114e-05, + "loss": 0.5548, + "step": 476 + }, + { + "epoch": 1.0719101123595505, + "grad_norm": 0.46029964089393616, + "learning_rate": 2.4484051653282405e-05, + "loss": 0.0554, + "step": 477 + }, + { + "epoch": 1.0741573033707865, + "grad_norm": 3.19266939163208, + "learning_rate": 2.44241884642112e-05, + "loss": 0.8244, + "step": 478 + }, + { + "epoch": 1.0764044943820226, + "grad_norm": 0.49586713314056396, + "learning_rate": 2.4364258610319604e-05, + "loss": 0.0369, + "step": 479 + }, + { + "epoch": 1.0786516853932584, + "grad_norm": 3.8522789478302, + "learning_rate": 2.4304263258369612e-05, + "loss": 0.747, + "step": 480 + }, + { + "epoch": 1.0808988764044944, + "grad_norm": 2.929213285446167, + "learning_rate": 2.4244203576398378e-05, + "loss": 0.2507, + "step": 481 + }, + { + "epoch": 1.0831460674157303, + "grad_norm": 0.4208325147628784, + "learning_rate": 2.418408073369549e-05, + "loss": 0.0304, + "step": 482 + }, + { + "epoch": 1.0853932584269663, + "grad_norm": 3.958451271057129, + "learning_rate": 2.4123895900780194e-05, + "loss": 0.7735, + "step": 483 + }, + { + "epoch": 1.0876404494382022, + "grad_norm": 3.8647449016571045, + "learning_rate": 2.4063650249378617e-05, + "loss": 0.7526, + "step": 484 + }, + { + "epoch": 1.0898876404494382, + "grad_norm": 3.1796231269836426, + "learning_rate": 2.4003344952400947e-05, + "loss": 0.7959, + "step": 485 + }, + { + "epoch": 1.0921348314606742, + "grad_norm": 3.6608550548553467, + "learning_rate": 2.3942981183918597e-05, + "loss": 0.7405, + "step": 486 + }, + { + "epoch": 1.09438202247191, + "grad_norm": 3.589618682861328, + "learning_rate": 2.388256011914134e-05, + "loss": 0.7041, + "step": 487 + }, + { + "epoch": 1.0966292134831461, + "grad_norm": 3.798146963119507, + "learning_rate": 2.382208293439447e-05, + "loss": 0.6991, + "step": 488 + }, + { + "epoch": 1.098876404494382, + "grad_norm": 0.4198363423347473, + "learning_rate": 2.3761550807095828e-05, + "loss": 0.0462, + "step": 489 + }, + { + "epoch": 1.101123595505618, + "grad_norm": 3.7368414402008057, + "learning_rate": 2.3700964915732954e-05, + "loss": 0.5835, + "step": 490 + }, + { + "epoch": 1.1033707865168538, + "grad_norm": 2.6319525241851807, + "learning_rate": 2.364032643984009e-05, + "loss": 0.2632, + "step": 491 + }, + { + "epoch": 1.1056179775280899, + "grad_norm": 3.6233298778533936, + "learning_rate": 2.3579636559975242e-05, + "loss": 0.4681, + "step": 492 + }, + { + "epoch": 1.107865168539326, + "grad_norm": 3.0172781944274902, + "learning_rate": 2.351889645769719e-05, + "loss": 0.7271, + "step": 493 + }, + { + "epoch": 1.1101123595505618, + "grad_norm": 2.4773848056793213, + "learning_rate": 2.3458107315542488e-05, + "loss": 0.2582, + "step": 494 + }, + { + "epoch": 1.1123595505617978, + "grad_norm": 2.33353853225708, + "learning_rate": 2.3397270317002424e-05, + "loss": 0.2251, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_VitaminC_cosine_accuracy": 0.552734375, + "eval_VitaminC_cosine_accuracy_threshold": 0.7386432886123657, + "eval_VitaminC_cosine_ap": 0.5475541126516937, + "eval_VitaminC_cosine_f1": 0.6675639300134589, + "eval_VitaminC_cosine_f1_threshold": 0.36797067523002625, + "eval_VitaminC_cosine_precision": 0.5040650406504065, + "eval_VitaminC_cosine_recall": 0.9880478087649402, + "eval_VitaminC_dot_accuracy": 0.546875, + "eval_VitaminC_dot_accuracy_threshold": 329.7876281738281, + "eval_VitaminC_dot_ap": 0.5321466642848512, + "eval_VitaminC_dot_f1": 0.6684931506849316, + "eval_VitaminC_dot_f1_threshold": 163.3940887451172, + "eval_VitaminC_dot_precision": 0.5093945720250522, + "eval_VitaminC_dot_recall": 0.9721115537848606, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 16.11573028564453, + "eval_VitaminC_euclidean_ap": 0.5516659121082983, + "eval_VitaminC_euclidean_f1": 0.6657789613848203, + "eval_VitaminC_euclidean_f1_threshold": 23.179344177246094, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 0.9960159362549801, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 311.29736328125, + "eval_VitaminC_manhattan_ap": 0.5507056801905115, + "eval_VitaminC_manhattan_f1": 0.6657789613848203, + "eval_VitaminC_manhattan_f1_threshold": 492.5957946777344, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 0.9960159362549801, + "eval_VitaminC_max_accuracy": 0.5546875, + "eval_VitaminC_max_accuracy_threshold": 329.7876281738281, + "eval_VitaminC_max_ap": 0.5516659121082983, + "eval_VitaminC_max_f1": 0.6684931506849316, + "eval_VitaminC_max_f1_threshold": 492.5957946777344, + "eval_VitaminC_max_precision": 0.5093945720250522, + "eval_VitaminC_max_recall": 0.9960159362549801, + "eval_sequential_score": 0.5516659121082983, + "eval_sts-test_pearson_cosine": 0.8841762545397394, + "eval_sts-test_pearson_dot": 0.8767370504598664, + "eval_sts-test_pearson_euclidean": 0.9052591981779188, + "eval_sts-test_pearson_manhattan": 0.9058933866613134, + "eval_sts-test_pearson_max": 0.9058933866613134, + "eval_sts-test_spearman_cosine": 0.9052463711785245, + "eval_sts-test_spearman_dot": 0.8788541357679011, + "eval_sts-test_spearman_euclidean": 0.8998455204462421, + "eval_sts-test_spearman_manhattan": 0.9007272097981753, + "eval_sts-test_spearman_max": 0.9052463711785245, + "eval_vitaminc-pairs_loss": 1.385530948638916, + "eval_vitaminc-pairs_runtime": 1.9019, + "eval_vitaminc-pairs_samples_per_second": 56.786, + "eval_vitaminc-pairs_steps_per_second": 1.052, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_negation-triplets_loss": 0.8651055097579956, + "eval_negation-triplets_runtime": 0.3083, + "eval_negation-triplets_samples_per_second": 207.568, + "eval_negation-triplets_steps_per_second": 3.243, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_scitail-pairs-pos_loss": 0.0760912150144577, + "eval_scitail-pairs-pos_runtime": 0.4061, + "eval_scitail-pairs-pos_samples_per_second": 132.981, + "eval_scitail-pairs-pos_steps_per_second": 2.463, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_scitail-pairs-qa_loss": 0.0005116994143463671, + "eval_scitail-pairs-qa_runtime": 0.5389, + "eval_scitail-pairs-qa_samples_per_second": 237.512, + "eval_scitail-pairs-qa_steps_per_second": 3.711, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_xsum-pairs_loss": 0.02057916484773159, + "eval_xsum-pairs_runtime": 2.7698, + "eval_xsum-pairs_samples_per_second": 46.213, + "eval_xsum-pairs_steps_per_second": 0.722, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_sciq_pairs_loss": 0.015313890762627125, + "eval_sciq_pairs_runtime": 2.8967, + "eval_sciq_pairs_samples_per_second": 44.189, + "eval_sciq_pairs_steps_per_second": 0.69, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_qasc_pairs_loss": 0.10294634103775024, + "eval_qasc_pairs_runtime": 0.6624, + "eval_qasc_pairs_samples_per_second": 193.223, + "eval_qasc_pairs_steps_per_second": 3.019, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_openbookqa_pairs_loss": 0.679233968257904, + "eval_openbookqa_pairs_runtime": 0.5955, + "eval_openbookqa_pairs_samples_per_second": 214.948, + "eval_openbookqa_pairs_steps_per_second": 3.359, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_msmarco_pairs_loss": 0.15479065477848053, + "eval_msmarco_pairs_runtime": 1.498, + "eval_msmarco_pairs_samples_per_second": 85.447, + "eval_msmarco_pairs_steps_per_second": 1.335, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_nq_pairs_loss": 0.0932854488492012, + "eval_nq_pairs_runtime": 2.3621, + "eval_nq_pairs_samples_per_second": 54.188, + "eval_nq_pairs_steps_per_second": 0.847, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_trivia_pairs_loss": 0.5306271910667419, + "eval_trivia_pairs_runtime": 3.5969, + "eval_trivia_pairs_samples_per_second": 35.586, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_gooaq_pairs_loss": 0.2823023796081543, + "eval_gooaq_pairs_runtime": 0.9247, + "eval_gooaq_pairs_samples_per_second": 138.43, + "eval_gooaq_pairs_steps_per_second": 2.163, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_paws-pos_loss": 0.02393445000052452, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.692, + "eval_paws-pos_steps_per_second": 2.839, + "step": 495 + }, + { + "epoch": 1.1146067415730336, + "grad_norm": 0.5595234632492065, + "learning_rate": 2.3336386646500005e-05, + "loss": 0.0385, + "step": 496 + }, + { + "epoch": 1.1168539325842697, + "grad_norm": 3.837158679962158, + "learning_rate": 2.327545748936687e-05, + "loss": 0.7277, + "step": 497 + }, + { + "epoch": 1.1191011235955055, + "grad_norm": 3.817466974258423, + "learning_rate": 2.3214484031820252e-05, + "loss": 0.705, + "step": 498 + }, + { + "epoch": 1.1213483146067416, + "grad_norm": 3.6540205478668213, + "learning_rate": 2.315346746093984e-05, + "loss": 0.6059, + "step": 499 + }, + { + "epoch": 1.1235955056179776, + "grad_norm": 3.6074440479278564, + "learning_rate": 2.309240896464469e-05, + "loss": 0.6156, + "step": 500 + }, + { + "epoch": 1.1258426966292134, + "grad_norm": 3.6017813682556152, + "learning_rate": 2.30313097316701e-05, + "loss": 0.6809, + "step": 501 + }, + { + "epoch": 1.1280898876404495, + "grad_norm": 2.9375529289245605, + "learning_rate": 2.2970170951544467e-05, + "loss": 0.7104, + "step": 502 + }, + { + "epoch": 1.1303370786516853, + "grad_norm": 2.9063873291015625, + "learning_rate": 2.2908993814566104e-05, + "loss": 0.4397, + "step": 503 + }, + { + "epoch": 1.1325842696629214, + "grad_norm": 4.570181846618652, + "learning_rate": 2.284777951178011e-05, + "loss": 0.6952, + "step": 504 + }, + { + "epoch": 1.1348314606741572, + "grad_norm": 0.4327554702758789, + "learning_rate": 2.2786529234955137e-05, + "loss": 0.0557, + "step": 505 + }, + { + "epoch": 1.1370786516853932, + "grad_norm": 3.457714796066284, + "learning_rate": 2.2725244176560217e-05, + "loss": 0.6711, + "step": 506 + }, + { + "epoch": 1.1393258426966293, + "grad_norm": 3.4728267192840576, + "learning_rate": 2.2663925529741547e-05, + "loss": 0.7173, + "step": 507 + }, + { + "epoch": 1.1415730337078651, + "grad_norm": 3.583503246307373, + "learning_rate": 2.2602574488299232e-05, + "loss": 0.7037, + "step": 508 + }, + { + "epoch": 1.1438202247191012, + "grad_norm": 4.4957661628723145, + "learning_rate": 2.2541192246664077e-05, + "loss": 0.8578, + "step": 509 + }, + { + "epoch": 1.146067415730337, + "grad_norm": 3.447329044342041, + "learning_rate": 2.2479779999874303e-05, + "loss": 0.6712, + "step": 510 + }, + { + "epoch": 1.148314606741573, + "grad_norm": 3.2203116416931152, + "learning_rate": 2.2418338943552296e-05, + "loss": 0.7472, + "step": 511 + }, + { + "epoch": 1.1505617977528089, + "grad_norm": 3.551426410675049, + "learning_rate": 2.235687027388135e-05, + "loss": 0.5911, + "step": 512 + }, + { + "epoch": 1.152808988764045, + "grad_norm": 3.8964977264404297, + "learning_rate": 2.229537518758233e-05, + "loss": 0.6827, + "step": 513 + }, + { + "epoch": 1.155056179775281, + "grad_norm": 3.483255624771118, + "learning_rate": 2.2233854881890425e-05, + "loss": 0.5034, + "step": 514 + }, + { + "epoch": 1.1573033707865168, + "grad_norm": 4.6676740646362305, + "learning_rate": 2.2172310554531788e-05, + "loss": 0.8367, + "step": 515 + }, + { + "epoch": 1.1595505617977528, + "grad_norm": 3.4448702335357666, + "learning_rate": 2.2110743403700276e-05, + "loss": 0.6596, + "step": 516 + }, + { + "epoch": 1.1617977528089887, + "grad_norm": 0.5997368097305298, + "learning_rate": 2.2049154628034062e-05, + "loss": 0.0859, + "step": 517 + }, + { + "epoch": 1.1640449438202247, + "grad_norm": 2.822986602783203, + "learning_rate": 2.1987545426592347e-05, + "loss": 0.2797, + "step": 518 + }, + { + "epoch": 1.1662921348314608, + "grad_norm": 3.156625270843506, + "learning_rate": 2.1925916998832005e-05, + "loss": 0.5181, + "step": 519 + }, + { + "epoch": 1.1685393258426966, + "grad_norm": 3.550964832305908, + "learning_rate": 2.1864270544584192e-05, + "loss": 0.6837, + "step": 520 + }, + { + "epoch": 1.1707865168539326, + "grad_norm": 3.6208648681640625, + "learning_rate": 2.1802607264031045e-05, + "loss": 0.7238, + "step": 521 + }, + { + "epoch": 1.1730337078651685, + "grad_norm": 0.5348507165908813, + "learning_rate": 2.174092835768228e-05, + "loss": 0.0318, + "step": 522 + }, + { + "epoch": 1.1752808988764045, + "grad_norm": 0.4848617911338806, + "learning_rate": 2.167923502635183e-05, + "loss": 0.0694, + "step": 523 + }, + { + "epoch": 1.1775280898876406, + "grad_norm": 3.322484016418457, + "learning_rate": 2.161752847113446e-05, + "loss": 0.7472, + "step": 524 + }, + { + "epoch": 1.1797752808988764, + "grad_norm": 5.0792999267578125, + "learning_rate": 2.1555809893382403e-05, + "loss": 0.8912, + "step": 525 + }, + { + "epoch": 1.1820224719101124, + "grad_norm": 3.029616117477417, + "learning_rate": 2.1494080494681936e-05, + "loss": 0.7744, + "step": 526 + }, + { + "epoch": 1.1842696629213483, + "grad_norm": 3.989457845687866, + "learning_rate": 2.1432341476830015e-05, + "loss": 0.6869, + "step": 527 + }, + { + "epoch": 1.1865168539325843, + "grad_norm": 4.336198806762695, + "learning_rate": 2.137059404181087e-05, + "loss": 0.8497, + "step": 528 + }, + { + "epoch": 1.1887640449438202, + "grad_norm": 2.7506906986236572, + "learning_rate": 2.1308839391772608e-05, + "loss": 0.4281, + "step": 529 + }, + { + "epoch": 1.1910112359550562, + "grad_norm": 3.7945544719696045, + "learning_rate": 2.12470787290038e-05, + "loss": 0.7605, + "step": 530 + }, + { + "epoch": 1.1932584269662923, + "grad_norm": 3.5343854427337646, + "learning_rate": 2.1185313255910074e-05, + "loss": 0.6354, + "step": 531 + }, + { + "epoch": 1.195505617977528, + "grad_norm": 0.35124847292900085, + "learning_rate": 2.1123544174990714e-05, + "loss": 0.0518, + "step": 532 + }, + { + "epoch": 1.1977528089887641, + "grad_norm": 2.397141456604004, + "learning_rate": 2.106177268881524e-05, + "loss": 0.2602, + "step": 533 + }, + { + "epoch": 1.2, + "grad_norm": 3.2060976028442383, + "learning_rate": 2.1e-05, + "loss": 0.5082, + "step": 534 + }, + { + "epoch": 1.202247191011236, + "grad_norm": 0.4915749132633209, + "learning_rate": 2.093822731118476e-05, + "loss": 0.0603, + "step": 535 + }, + { + "epoch": 1.2044943820224718, + "grad_norm": 6.44587516784668, + "learning_rate": 2.087645582500929e-05, + "loss": 2.3371, + "step": 536 + }, + { + "epoch": 1.2067415730337079, + "grad_norm": 2.940648078918457, + "learning_rate": 2.0814686744089924e-05, + "loss": 0.6513, + "step": 537 + }, + { + "epoch": 1.208988764044944, + "grad_norm": 3.5047895908355713, + "learning_rate": 2.0752921270996197e-05, + "loss": 0.6053, + "step": 538 + }, + { + "epoch": 1.2112359550561798, + "grad_norm": 0.4148883819580078, + "learning_rate": 2.069116060822739e-05, + "loss": 0.0544, + "step": 539 + }, + { + "epoch": 1.2134831460674158, + "grad_norm": 3.633577823638916, + "learning_rate": 2.062940595818913e-05, + "loss": 0.7219, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.750012993812561, + "eval_VitaminC_cosine_ap": 0.5498478359791117, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.2890807092189789, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.544921875, + "eval_VitaminC_dot_accuracy_threshold": 325.9202880859375, + "eval_VitaminC_dot_ap": 0.5307476674257613, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 100.63825988769531, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 12.168689727783203, + "eval_VitaminC_euclidean_ap": 0.5532762404675531, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 23.49704360961914, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 251.46685791015625, + "eval_VitaminC_manhattan_ap": 0.5528651894260193, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 511.0567321777344, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 325.9202880859375, + "eval_VitaminC_max_ap": 0.5532762404675531, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 511.0567321777344, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5532762404675531, + "eval_sts-test_pearson_cosine": 0.8836542354588774, + "eval_sts-test_pearson_dot": 0.8766446823118297, + "eval_sts-test_pearson_euclidean": 0.9062930503225336, + "eval_sts-test_pearson_manhattan": 0.9067495755923205, + "eval_sts-test_pearson_max": 0.9067495755923205, + "eval_sts-test_spearman_cosine": 0.9065594179390095, + "eval_sts-test_spearman_dot": 0.8763055514316607, + "eval_sts-test_spearman_euclidean": 0.9012305719863057, + "eval_sts-test_spearman_manhattan": 0.901725878947386, + "eval_sts-test_spearman_max": 0.9065594179390095, + "eval_vitaminc-pairs_loss": 1.377655029296875, + "eval_vitaminc-pairs_runtime": 1.9029, + "eval_vitaminc-pairs_samples_per_second": 56.754, + "eval_vitaminc-pairs_steps_per_second": 1.051, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_negation-triplets_loss": 0.8754605650901794, + "eval_negation-triplets_runtime": 0.3041, + "eval_negation-triplets_samples_per_second": 210.479, + "eval_negation-triplets_steps_per_second": 3.289, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_scitail-pairs-pos_loss": 0.08722448348999023, + "eval_scitail-pairs-pos_runtime": 0.4119, + "eval_scitail-pairs-pos_samples_per_second": 131.115, + "eval_scitail-pairs-pos_steps_per_second": 2.428, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_scitail-pairs-qa_loss": 0.0006766854785382748, + "eval_scitail-pairs-qa_runtime": 0.5432, + "eval_scitail-pairs-qa_samples_per_second": 235.655, + "eval_scitail-pairs-qa_steps_per_second": 3.682, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_xsum-pairs_loss": 0.02525785192847252, + "eval_xsum-pairs_runtime": 2.7625, + "eval_xsum-pairs_samples_per_second": 46.335, + "eval_xsum-pairs_steps_per_second": 0.724, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_sciq_pairs_loss": 0.016477206721901894, + "eval_sciq_pairs_runtime": 2.9071, + "eval_sciq_pairs_samples_per_second": 44.031, + "eval_sciq_pairs_steps_per_second": 0.688, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_qasc_pairs_loss": 0.09754681587219238, + "eval_qasc_pairs_runtime": 0.6665, + "eval_qasc_pairs_samples_per_second": 192.059, + "eval_qasc_pairs_steps_per_second": 3.001, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_openbookqa_pairs_loss": 0.6885332465171814, + "eval_openbookqa_pairs_runtime": 0.5914, + "eval_openbookqa_pairs_samples_per_second": 216.427, + "eval_openbookqa_pairs_steps_per_second": 3.382, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_msmarco_pairs_loss": 0.13402031362056732, + "eval_msmarco_pairs_runtime": 1.5047, + "eval_msmarco_pairs_samples_per_second": 85.067, + "eval_msmarco_pairs_steps_per_second": 1.329, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_nq_pairs_loss": 0.09435093402862549, + "eval_nq_pairs_runtime": 2.366, + "eval_nq_pairs_samples_per_second": 54.101, + "eval_nq_pairs_steps_per_second": 0.845, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_trivia_pairs_loss": 0.5325451493263245, + "eval_trivia_pairs_runtime": 3.6088, + "eval_trivia_pairs_samples_per_second": 35.468, + "eval_trivia_pairs_steps_per_second": 0.554, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_gooaq_pairs_loss": 0.2803599536418915, + "eval_gooaq_pairs_runtime": 0.9204, + "eval_gooaq_pairs_samples_per_second": 139.063, + "eval_gooaq_pairs_steps_per_second": 2.173, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_paws-pos_loss": 0.024230225011706352, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.679, + "eval_paws-pos_steps_per_second": 2.839, + "step": 540 + }, + { + "epoch": 1.2157303370786516, + "grad_norm": 3.6125593185424805, + "learning_rate": 2.056765852316999e-05, + "loss": 0.6862, + "step": 541 + }, + { + "epoch": 1.2179775280898877, + "grad_norm": 2.6327385902404785, + "learning_rate": 2.0505919505318062e-05, + "loss": 0.4639, + "step": 542 + }, + { + "epoch": 1.2202247191011235, + "grad_norm": 3.4139833450317383, + "learning_rate": 2.0444190106617598e-05, + "loss": 0.6663, + "step": 543 + }, + { + "epoch": 1.2224719101123596, + "grad_norm": 3.4155499935150146, + "learning_rate": 2.0382471528865537e-05, + "loss": 0.5047, + "step": 544 + }, + { + "epoch": 1.2247191011235956, + "grad_norm": 2.4931771755218506, + "learning_rate": 2.0320764973648166e-05, + "loss": 0.2306, + "step": 545 + }, + { + "epoch": 1.2269662921348314, + "grad_norm": 3.664468288421631, + "learning_rate": 2.0259071642317713e-05, + "loss": 0.7147, + "step": 546 + }, + { + "epoch": 1.2292134831460675, + "grad_norm": 0.5503119826316833, + "learning_rate": 2.0197392735968953e-05, + "loss": 0.0344, + "step": 547 + }, + { + "epoch": 1.2314606741573033, + "grad_norm": 2.96278977394104, + "learning_rate": 2.013572945541581e-05, + "loss": 0.4429, + "step": 548 + }, + { + "epoch": 1.2337078651685394, + "grad_norm": 3.6760940551757812, + "learning_rate": 2.0074083001167992e-05, + "loss": 0.6966, + "step": 549 + }, + { + "epoch": 1.2359550561797752, + "grad_norm": 2.8022167682647705, + "learning_rate": 2.0012454573407644e-05, + "loss": 0.6926, + "step": 550 + }, + { + "epoch": 1.2382022471910112, + "grad_norm": 2.4972310066223145, + "learning_rate": 1.995084537196594e-05, + "loss": 0.261, + "step": 551 + }, + { + "epoch": 1.2404494382022473, + "grad_norm": 3.387359142303467, + "learning_rate": 1.9889256596299725e-05, + "loss": 0.6558, + "step": 552 + }, + { + "epoch": 1.2426966292134831, + "grad_norm": 3.303635358810425, + "learning_rate": 1.9827689445468206e-05, + "loss": 0.6285, + "step": 553 + }, + { + "epoch": 1.2449438202247192, + "grad_norm": 2.8494338989257812, + "learning_rate": 1.9766145118109576e-05, + "loss": 0.6471, + "step": 554 + }, + { + "epoch": 1.247191011235955, + "grad_norm": 3.350094795227051, + "learning_rate": 1.9704624812417665e-05, + "loss": 0.4989, + "step": 555 + }, + { + "epoch": 1.249438202247191, + "grad_norm": 2.0344486236572266, + "learning_rate": 1.9643129726118646e-05, + "loss": 0.195, + "step": 556 + }, + { + "epoch": 1.2516853932584269, + "grad_norm": 4.317070007324219, + "learning_rate": 1.9581661056447698e-05, + "loss": 0.8431, + "step": 557 + }, + { + "epoch": 1.253932584269663, + "grad_norm": 3.8960471153259277, + "learning_rate": 1.95202200001257e-05, + "loss": 0.642, + "step": 558 + }, + { + "epoch": 1.256179775280899, + "grad_norm": 2.2174787521362305, + "learning_rate": 1.9458807753335924e-05, + "loss": 0.2251, + "step": 559 + }, + { + "epoch": 1.2584269662921348, + "grad_norm": 2.0047852993011475, + "learning_rate": 1.9397425511700762e-05, + "loss": 0.2057, + "step": 560 + }, + { + "epoch": 1.2606741573033708, + "grad_norm": 2.035231828689575, + "learning_rate": 1.933607447025845e-05, + "loss": 0.2198, + "step": 561 + }, + { + "epoch": 1.2629213483146067, + "grad_norm": 3.2453677654266357, + "learning_rate": 1.9274755823439777e-05, + "loss": 0.4856, + "step": 562 + }, + { + "epoch": 1.2651685393258427, + "grad_norm": 0.46472156047821045, + "learning_rate": 1.9213470765044864e-05, + "loss": 0.0273, + "step": 563 + }, + { + "epoch": 1.2674157303370785, + "grad_norm": 0.5257102251052856, + "learning_rate": 1.915222048821989e-05, + "loss": 0.0302, + "step": 564 + }, + { + "epoch": 1.2696629213483146, + "grad_norm": 2.3320302963256836, + "learning_rate": 1.9091006185433897e-05, + "loss": 0.1863, + "step": 565 + }, + { + "epoch": 1.2719101123595506, + "grad_norm": 4.208177089691162, + "learning_rate": 1.9029829048455534e-05, + "loss": 0.8053, + "step": 566 + }, + { + "epoch": 1.2741573033707865, + "grad_norm": 2.2916760444641113, + "learning_rate": 1.8968690268329893e-05, + "loss": 0.1935, + "step": 567 + }, + { + "epoch": 1.2764044943820225, + "grad_norm": 3.5239036083221436, + "learning_rate": 1.8907591035355305e-05, + "loss": 0.5837, + "step": 568 + }, + { + "epoch": 1.2786516853932584, + "grad_norm": 3.938844680786133, + "learning_rate": 1.884653253906016e-05, + "loss": 0.7606, + "step": 569 + }, + { + "epoch": 1.2808988764044944, + "grad_norm": 1.9056942462921143, + "learning_rate": 1.8785515968179746e-05, + "loss": 0.1904, + "step": 570 + }, + { + "epoch": 1.2831460674157302, + "grad_norm": 3.481647253036499, + "learning_rate": 1.8724542510633123e-05, + "loss": 0.6585, + "step": 571 + }, + { + "epoch": 1.2853932584269663, + "grad_norm": 3.712449550628662, + "learning_rate": 1.8663613353499996e-05, + "loss": 0.7043, + "step": 572 + }, + { + "epoch": 1.2876404494382023, + "grad_norm": 2.702223539352417, + "learning_rate": 1.8602729682997573e-05, + "loss": 0.6083, + "step": 573 + }, + { + "epoch": 1.2898876404494382, + "grad_norm": 3.8883962631225586, + "learning_rate": 1.854189268445751e-05, + "loss": 0.6523, + "step": 574 + }, + { + "epoch": 1.2921348314606742, + "grad_norm": 3.663496494293213, + "learning_rate": 1.8481103542302805e-05, + "loss": 0.553, + "step": 575 + }, + { + "epoch": 1.29438202247191, + "grad_norm": 3.476609945297241, + "learning_rate": 1.8420363440024752e-05, + "loss": 0.6234, + "step": 576 + }, + { + "epoch": 1.296629213483146, + "grad_norm": 2.72796368598938, + "learning_rate": 1.8359673560159906e-05, + "loss": 0.4428, + "step": 577 + }, + { + "epoch": 1.298876404494382, + "grad_norm": 3.249882698059082, + "learning_rate": 1.829903508426704e-05, + "loss": 0.5433, + "step": 578 + }, + { + "epoch": 1.301123595505618, + "grad_norm": 3.393094539642334, + "learning_rate": 1.823844919290417e-05, + "loss": 0.4937, + "step": 579 + }, + { + "epoch": 1.303370786516854, + "grad_norm": 2.7802557945251465, + "learning_rate": 1.817791706560553e-05, + "loss": 0.2222, + "step": 580 + }, + { + "epoch": 1.3056179775280898, + "grad_norm": 3.4187793731689453, + "learning_rate": 1.8117439880858653e-05, + "loss": 0.5672, + "step": 581 + }, + { + "epoch": 1.3078651685393259, + "grad_norm": 2.9662680625915527, + "learning_rate": 1.8057018816081404e-05, + "loss": 0.6562, + "step": 582 + }, + { + "epoch": 1.310112359550562, + "grad_norm": 0.4572099447250366, + "learning_rate": 1.7996655047599054e-05, + "loss": 0.056, + "step": 583 + }, + { + "epoch": 1.3123595505617978, + "grad_norm": 2.625009775161743, + "learning_rate": 1.7936349750621377e-05, + "loss": 0.4015, + "step": 584 + }, + { + "epoch": 1.3146067415730336, + "grad_norm": 3.479508399963379, + "learning_rate": 1.7876104099219804e-05, + "loss": 0.6675, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8350518941879272, + "eval_VitaminC_cosine_ap": 0.5499225365506326, + "eval_VitaminC_cosine_f1": 0.6721311475409836, + "eval_VitaminC_cosine_f1_threshold": 0.4279438257217407, + "eval_VitaminC_cosine_precision": 0.5114345114345115, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 320.0419616699219, + "eval_VitaminC_dot_ap": 0.532421197041184, + "eval_VitaminC_dot_f1": 0.6693989071038251, + "eval_VitaminC_dot_f1_threshold": 162.15530395507812, + "eval_VitaminC_dot_precision": 0.5093555093555093, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 11.53189468383789, + "eval_VitaminC_euclidean_ap": 0.5507235346667002, + "eval_VitaminC_euclidean_f1": 0.665742024965326, + "eval_VitaminC_euclidean_f1_threshold": 20.513931274414062, + "eval_VitaminC_euclidean_precision": 0.5106382978723404, + "eval_VitaminC_euclidean_recall": 0.9561752988047809, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 236.3373260498047, + "eval_VitaminC_manhattan_ap": 0.5500919145575321, + "eval_VitaminC_manhattan_f1": 0.664886515353805, + "eval_VitaminC_manhattan_f1_threshold": 483.1922607421875, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 0.9920318725099602, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 320.0419616699219, + "eval_VitaminC_max_ap": 0.5507235346667002, + "eval_VitaminC_max_f1": 0.6721311475409836, + "eval_VitaminC_max_f1_threshold": 483.1922607421875, + "eval_VitaminC_max_precision": 0.5114345114345115, + "eval_VitaminC_max_recall": 0.9920318725099602, + "eval_sequential_score": 0.5507235346667002, + "eval_sts-test_pearson_cosine": 0.8854424628564648, + "eval_sts-test_pearson_dot": 0.8781775369503937, + "eval_sts-test_pearson_euclidean": 0.9073948686610891, + "eval_sts-test_pearson_manhattan": 0.9077723844704348, + "eval_sts-test_pearson_max": 0.9077723844704348, + "eval_sts-test_spearman_cosine": 0.9079970522112082, + "eval_sts-test_spearman_dot": 0.8790970008634722, + "eval_sts-test_spearman_euclidean": 0.9029484386573375, + "eval_sts-test_spearman_manhattan": 0.9037780375410113, + "eval_sts-test_spearman_max": 0.9079970522112082, + "eval_vitaminc-pairs_loss": 1.3895310163497925, + "eval_vitaminc-pairs_runtime": 1.8903, + "eval_vitaminc-pairs_samples_per_second": 57.134, + "eval_vitaminc-pairs_steps_per_second": 1.058, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_negation-triplets_loss": 0.9072961807250977, + "eval_negation-triplets_runtime": 0.3009, + "eval_negation-triplets_samples_per_second": 212.728, + "eval_negation-triplets_steps_per_second": 3.324, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_scitail-pairs-pos_loss": 0.09969545155763626, + "eval_scitail-pairs-pos_runtime": 0.3958, + "eval_scitail-pairs-pos_samples_per_second": 136.434, + "eval_scitail-pairs-pos_steps_per_second": 2.527, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_scitail-pairs-qa_loss": 0.0006847005570307374, + "eval_scitail-pairs-qa_runtime": 0.5306, + "eval_scitail-pairs-qa_samples_per_second": 241.242, + "eval_scitail-pairs-qa_steps_per_second": 3.769, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_xsum-pairs_loss": 0.02075883559882641, + "eval_xsum-pairs_runtime": 2.7478, + "eval_xsum-pairs_samples_per_second": 46.583, + "eval_xsum-pairs_steps_per_second": 0.728, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_sciq_pairs_loss": 0.01607341691851616, + "eval_sciq_pairs_runtime": 2.8547, + "eval_sciq_pairs_samples_per_second": 44.838, + "eval_sciq_pairs_steps_per_second": 0.701, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_qasc_pairs_loss": 0.09826790541410446, + "eval_qasc_pairs_runtime": 0.6743, + "eval_qasc_pairs_samples_per_second": 189.831, + "eval_qasc_pairs_steps_per_second": 2.966, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_openbookqa_pairs_loss": 0.7106958031654358, + "eval_openbookqa_pairs_runtime": 0.6028, + "eval_openbookqa_pairs_samples_per_second": 212.347, + "eval_openbookqa_pairs_steps_per_second": 3.318, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_msmarco_pairs_loss": 0.14438961446285248, + "eval_msmarco_pairs_runtime": 1.4968, + "eval_msmarco_pairs_samples_per_second": 85.516, + "eval_msmarco_pairs_steps_per_second": 1.336, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_nq_pairs_loss": 0.08692270517349243, + "eval_nq_pairs_runtime": 2.3567, + "eval_nq_pairs_samples_per_second": 54.312, + "eval_nq_pairs_steps_per_second": 0.849, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_trivia_pairs_loss": 0.4988090991973877, + "eval_trivia_pairs_runtime": 3.5886, + "eval_trivia_pairs_samples_per_second": 35.669, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_gooaq_pairs_loss": 0.2779709994792938, + "eval_gooaq_pairs_runtime": 0.9125, + "eval_gooaq_pairs_samples_per_second": 140.269, + "eval_gooaq_pairs_steps_per_second": 2.192, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_paws-pos_loss": 0.024158792570233345, + "eval_paws-pos_runtime": 0.7009, + "eval_paws-pos_samples_per_second": 182.63, + "eval_paws-pos_steps_per_second": 2.854, + "step": 585 + }, + { + "epoch": 1.3168539325842696, + "grad_norm": 3.7084991931915283, + "learning_rate": 1.781591926630451e-05, + "loss": 0.7298, + "step": 586 + }, + { + "epoch": 1.3191011235955057, + "grad_norm": 0.33646777272224426, + "learning_rate": 1.7755796423601624e-05, + "loss": 0.0372, + "step": 587 + }, + { + "epoch": 1.3213483146067415, + "grad_norm": 3.1976261138916016, + "learning_rate": 1.7695736741630386e-05, + "loss": 0.7247, + "step": 588 + }, + { + "epoch": 1.3235955056179776, + "grad_norm": 3.706315040588379, + "learning_rate": 1.7635741389680394e-05, + "loss": 0.6839, + "step": 589 + }, + { + "epoch": 1.3258426966292136, + "grad_norm": 3.0135083198547363, + "learning_rate": 1.75758115357888e-05, + "loss": 0.6848, + "step": 590 + }, + { + "epoch": 1.3280898876404494, + "grad_norm": 3.324838876724243, + "learning_rate": 1.751594834671759e-05, + "loss": 0.4449, + "step": 591 + }, + { + "epoch": 1.3303370786516853, + "grad_norm": 2.1507813930511475, + "learning_rate": 1.7456152987930877e-05, + "loss": 0.2104, + "step": 592 + }, + { + "epoch": 1.3325842696629213, + "grad_norm": 2.267099380493164, + "learning_rate": 1.73964266235722e-05, + "loss": 0.391, + "step": 593 + }, + { + "epoch": 1.3348314606741574, + "grad_norm": 2.256056308746338, + "learning_rate": 1.7336770416441832e-05, + "loss": 0.3641, + "step": 594 + }, + { + "epoch": 1.3370786516853932, + "grad_norm": 3.728010416030884, + "learning_rate": 1.7277185527974203e-05, + "loss": 0.6953, + "step": 595 + }, + { + "epoch": 1.3393258426966292, + "grad_norm": 3.9698784351348877, + "learning_rate": 1.7217673118215237e-05, + "loss": 0.6382, + "step": 596 + }, + { + "epoch": 1.3415730337078653, + "grad_norm": 3.2924447059631348, + "learning_rate": 1.7158234345799802e-05, + "loss": 0.6245, + "step": 597 + }, + { + "epoch": 1.3438202247191011, + "grad_norm": 3.6384663581848145, + "learning_rate": 1.7098870367929108e-05, + "loss": 0.6775, + "step": 598 + }, + { + "epoch": 1.346067415730337, + "grad_norm": 3.9194507598876953, + "learning_rate": 1.703958234034823e-05, + "loss": 0.5727, + "step": 599 + }, + { + "epoch": 1.348314606741573, + "grad_norm": 0.4283960163593292, + "learning_rate": 1.698037141732357e-05, + "loss": 0.0567, + "step": 600 + }, + { + "epoch": 1.350561797752809, + "grad_norm": 3.536198377609253, + "learning_rate": 1.6921238751620402e-05, + "loss": 0.6258, + "step": 601 + }, + { + "epoch": 1.3528089887640449, + "grad_norm": 4.333379745483398, + "learning_rate": 1.6862185494480425e-05, + "loss": 0.8138, + "step": 602 + }, + { + "epoch": 1.355056179775281, + "grad_norm": 3.6202950477600098, + "learning_rate": 1.680321279559934e-05, + "loss": 0.6099, + "step": 603 + }, + { + "epoch": 1.357303370786517, + "grad_norm": 3.049006938934326, + "learning_rate": 1.6744321803104493e-05, + "loss": 0.6801, + "step": 604 + }, + { + "epoch": 1.3595505617977528, + "grad_norm": 6.1929850578308105, + "learning_rate": 1.668551366353248e-05, + "loss": 2.2003, + "step": 605 + }, + { + "epoch": 1.3617977528089886, + "grad_norm": 0.40746456384658813, + "learning_rate": 1.662678952180688e-05, + "loss": 0.052, + "step": 606 + }, + { + "epoch": 1.3640449438202247, + "grad_norm": 2.2396955490112305, + "learning_rate": 1.656815052121592e-05, + "loss": 0.2175, + "step": 607 + }, + { + "epoch": 1.3662921348314607, + "grad_norm": 4.395736217498779, + "learning_rate": 1.6509597803390222e-05, + "loss": 0.7671, + "step": 608 + }, + { + "epoch": 1.3685393258426966, + "grad_norm": 3.2075412273406982, + "learning_rate": 1.6451132508280602e-05, + "loss": 0.5524, + "step": 609 + }, + { + "epoch": 1.3707865168539326, + "grad_norm": 3.6282618045806885, + "learning_rate": 1.639275577413586e-05, + "loss": 0.5868, + "step": 610 + }, + { + "epoch": 1.3730337078651687, + "grad_norm": 2.8120224475860596, + "learning_rate": 1.6334468737480616e-05, + "loss": 0.6628, + "step": 611 + }, + { + "epoch": 1.3752808988764045, + "grad_norm": 4.477191925048828, + "learning_rate": 1.6276272533093184e-05, + "loss": 0.8106, + "step": 612 + }, + { + "epoch": 1.3775280898876405, + "grad_norm": 2.078286647796631, + "learning_rate": 1.621816829398349e-05, + "loss": 0.2, + "step": 613 + }, + { + "epoch": 1.3797752808988764, + "grad_norm": 3.698269844055176, + "learning_rate": 1.616015715137101e-05, + "loss": 0.57, + "step": 614 + }, + { + "epoch": 1.3820224719101124, + "grad_norm": 3.7442100048065186, + "learning_rate": 1.6102240234662735e-05, + "loss": 0.6329, + "step": 615 + }, + { + "epoch": 1.3842696629213482, + "grad_norm": 3.1588127613067627, + "learning_rate": 1.6044418671431197e-05, + "loss": 0.5616, + "step": 616 + }, + { + "epoch": 1.3865168539325843, + "grad_norm": 2.8839151859283447, + "learning_rate": 1.5986693587392505e-05, + "loss": 0.6678, + "step": 617 + }, + { + "epoch": 1.3887640449438203, + "grad_norm": 3.6140055656433105, + "learning_rate": 1.5929066106384448e-05, + "loss": 0.454, + "step": 618 + }, + { + "epoch": 1.3910112359550562, + "grad_norm": 3.0111019611358643, + "learning_rate": 1.5871537350344574e-05, + "loss": 0.5198, + "step": 619 + }, + { + "epoch": 1.3932584269662922, + "grad_norm": 3.6177947521209717, + "learning_rate": 1.581410843928841e-05, + "loss": 0.5259, + "step": 620 + }, + { + "epoch": 1.395505617977528, + "grad_norm": 4.257228374481201, + "learning_rate": 1.5756780491287593e-05, + "loss": 0.714, + "step": 621 + }, + { + "epoch": 1.397752808988764, + "grad_norm": 3.3783249855041504, + "learning_rate": 1.5699554622448154e-05, + "loss": 0.4943, + "step": 622 + }, + { + "epoch": 1.4, + "grad_norm": 0.527916431427002, + "learning_rate": 1.5642431946888743e-05, + "loss": 0.0324, + "step": 623 + }, + { + "epoch": 1.402247191011236, + "grad_norm": 0.4558711349964142, + "learning_rate": 1.5585413576718978e-05, + "loss": 0.0305, + "step": 624 + }, + { + "epoch": 1.404494382022472, + "grad_norm": 3.383774757385254, + "learning_rate": 1.5528500622017773e-05, + "loss": 0.5194, + "step": 625 + }, + { + "epoch": 1.4067415730337078, + "grad_norm": 3.434595823287964, + "learning_rate": 1.5471694190811722e-05, + "loss": 0.5412, + "step": 626 + }, + { + "epoch": 1.4089887640449439, + "grad_norm": 3.0937981605529785, + "learning_rate": 1.5414995389053546e-05, + "loss": 0.5688, + "step": 627 + }, + { + "epoch": 1.4112359550561797, + "grad_norm": 3.6951913833618164, + "learning_rate": 1.5358405320600546e-05, + "loss": 0.7636, + "step": 628 + }, + { + "epoch": 1.4134831460674158, + "grad_norm": 3.196275472640991, + "learning_rate": 1.530192508719312e-05, + "loss": 0.478, + "step": 629 + }, + { + "epoch": 1.4157303370786516, + "grad_norm": 3.235971212387085, + "learning_rate": 1.5245555788433292e-05, + "loss": 0.5674, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8177632093429565, + "eval_VitaminC_cosine_ap": 0.5552558686648947, + "eval_VitaminC_cosine_f1": 0.6657789613848203, + "eval_VitaminC_cosine_f1_threshold": 0.324923038482666, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 0.9960159362549801, + "eval_VitaminC_dot_accuracy": 0.55859375, + "eval_VitaminC_dot_accuracy_threshold": 289.75390625, + "eval_VitaminC_dot_ap": 0.5382889477754735, + "eval_VitaminC_dot_f1": 0.6693657219973009, + "eval_VitaminC_dot_f1_threshold": 149.46510314941406, + "eval_VitaminC_dot_precision": 0.5061224489795918, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.5625, + "eval_VitaminC_euclidean_accuracy_threshold": 15.008248329162598, + "eval_VitaminC_euclidean_ap": 0.555529307254583, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 19.720703125, + "eval_VitaminC_euclidean_precision": 0.5140388768898488, + "eval_VitaminC_euclidean_recall": 0.9482071713147411, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 241.89620971679688, + "eval_VitaminC_manhattan_ap": 0.5565558085377883, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 509.21246337890625, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5625, + "eval_VitaminC_max_accuracy_threshold": 289.75390625, + "eval_VitaminC_max_ap": 0.5565558085377883, + "eval_VitaminC_max_f1": 0.6693657219973009, + "eval_VitaminC_max_f1_threshold": 509.21246337890625, + "eval_VitaminC_max_precision": 0.5140388768898488, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5565558085377883, + "eval_sts-test_pearson_cosine": 0.8842482218274867, + "eval_sts-test_pearson_dot": 0.8750877929290466, + "eval_sts-test_pearson_euclidean": 0.9061417543180812, + "eval_sts-test_pearson_manhattan": 0.907116350231697, + "eval_sts-test_pearson_max": 0.907116350231697, + "eval_sts-test_spearman_cosine": 0.9058163882415199, + "eval_sts-test_spearman_dot": 0.8768090478735754, + "eval_sts-test_spearman_euclidean": 0.9019257380807598, + "eval_sts-test_spearman_manhattan": 0.9032297003351825, + "eval_sts-test_spearman_max": 0.9058163882415199, + "eval_vitaminc-pairs_loss": 1.419171690940857, + "eval_vitaminc-pairs_runtime": 1.9053, + "eval_vitaminc-pairs_samples_per_second": 56.684, + "eval_vitaminc-pairs_steps_per_second": 1.05, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_negation-triplets_loss": 0.93497234582901, + "eval_negation-triplets_runtime": 0.3078, + "eval_negation-triplets_samples_per_second": 207.955, + "eval_negation-triplets_steps_per_second": 3.249, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_scitail-pairs-pos_loss": 0.10255613923072815, + "eval_scitail-pairs-pos_runtime": 0.3984, + "eval_scitail-pairs-pos_samples_per_second": 135.532, + "eval_scitail-pairs-pos_steps_per_second": 2.51, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_scitail-pairs-qa_loss": 0.0005886165308766067, + "eval_scitail-pairs-qa_runtime": 0.5368, + "eval_scitail-pairs-qa_samples_per_second": 238.448, + "eval_scitail-pairs-qa_steps_per_second": 3.726, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_xsum-pairs_loss": 0.02434428222477436, + "eval_xsum-pairs_runtime": 2.7476, + "eval_xsum-pairs_samples_per_second": 46.586, + "eval_xsum-pairs_steps_per_second": 0.728, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_sciq_pairs_loss": 0.01709047146141529, + "eval_sciq_pairs_runtime": 2.8888, + "eval_sciq_pairs_samples_per_second": 44.31, + "eval_sciq_pairs_steps_per_second": 0.692, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_qasc_pairs_loss": 0.08857370167970657, + "eval_qasc_pairs_runtime": 0.662, + "eval_qasc_pairs_samples_per_second": 193.367, + "eval_qasc_pairs_steps_per_second": 3.021, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_openbookqa_pairs_loss": 0.7429466843605042, + "eval_openbookqa_pairs_runtime": 0.5899, + "eval_openbookqa_pairs_samples_per_second": 216.996, + "eval_openbookqa_pairs_steps_per_second": 3.391, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_msmarco_pairs_loss": 0.14679844677448273, + "eval_msmarco_pairs_runtime": 1.4975, + "eval_msmarco_pairs_samples_per_second": 85.478, + "eval_msmarco_pairs_steps_per_second": 1.336, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_nq_pairs_loss": 0.08464788645505905, + "eval_nq_pairs_runtime": 2.3677, + "eval_nq_pairs_samples_per_second": 54.06, + "eval_nq_pairs_steps_per_second": 0.845, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_trivia_pairs_loss": 0.49148350954055786, + "eval_trivia_pairs_runtime": 3.5901, + "eval_trivia_pairs_samples_per_second": 35.654, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_gooaq_pairs_loss": 0.2605786621570587, + "eval_gooaq_pairs_runtime": 0.9378, + "eval_gooaq_pairs_samples_per_second": 136.484, + "eval_gooaq_pairs_steps_per_second": 2.133, + "step": 630 + }, + { + "epoch": 1.4157303370786516, + "eval_paws-pos_loss": 0.02402164228260517, + "eval_paws-pos_runtime": 0.707, + "eval_paws-pos_samples_per_second": 181.047, + "eval_paws-pos_steps_per_second": 2.829, + "step": 630 + }, + { + "epoch": 1.4179775280898876, + "grad_norm": 4.603442192077637, + "learning_rate": 1.5189298521763352e-05, + "loss": 0.9232, + "step": 631 + }, + { + "epoch": 1.4202247191011237, + "grad_norm": 3.214153289794922, + "learning_rate": 1.5133154382444443e-05, + "loss": 0.613, + "step": 632 + }, + { + "epoch": 1.4224719101123595, + "grad_norm": 3.1973836421966553, + "learning_rate": 1.5077124463535252e-05, + "loss": 0.5689, + "step": 633 + }, + { + "epoch": 1.4247191011235956, + "grad_norm": 2.985643148422241, + "learning_rate": 1.5021209855870732e-05, + "loss": 0.4126, + "step": 634 + }, + { + "epoch": 1.4269662921348314, + "grad_norm": 2.1112682819366455, + "learning_rate": 1.4965411648040885e-05, + "loss": 0.2148, + "step": 635 + }, + { + "epoch": 1.4292134831460674, + "grad_norm": 2.8638250827789307, + "learning_rate": 1.4909730926369527e-05, + "loss": 0.7029, + "step": 636 + }, + { + "epoch": 1.4314606741573033, + "grad_norm": 2.3235864639282227, + "learning_rate": 1.4854168774893162e-05, + "loss": 0.3989, + "step": 637 + }, + { + "epoch": 1.4337078651685393, + "grad_norm": 3.925884962081909, + "learning_rate": 1.4798726275339885e-05, + "loss": 0.6291, + "step": 638 + }, + { + "epoch": 1.4359550561797754, + "grad_norm": 1.7517139911651611, + "learning_rate": 1.4743404507108308e-05, + "loss": 0.158, + "step": 639 + }, + { + "epoch": 1.4382022471910112, + "grad_norm": 3.1826531887054443, + "learning_rate": 1.4688204547246553e-05, + "loss": 0.4833, + "step": 640 + }, + { + "epoch": 1.4404494382022472, + "grad_norm": 0.4674391448497772, + "learning_rate": 1.4633127470431268e-05, + "loss": 0.0561, + "step": 641 + }, + { + "epoch": 1.442696629213483, + "grad_norm": 3.3981146812438965, + "learning_rate": 1.4578174348946728e-05, + "loss": 0.6613, + "step": 642 + }, + { + "epoch": 1.4449438202247191, + "grad_norm": 1.869647741317749, + "learning_rate": 1.4523346252663953e-05, + "loss": 0.1917, + "step": 643 + }, + { + "epoch": 1.447191011235955, + "grad_norm": 3.705204725265503, + "learning_rate": 1.4468644249019847e-05, + "loss": 0.5755, + "step": 644 + }, + { + "epoch": 1.449438202247191, + "grad_norm": 3.3335964679718018, + "learning_rate": 1.4414069402996472e-05, + "loss": 0.5609, + "step": 645 + }, + { + "epoch": 1.451685393258427, + "grad_norm": 3.096312999725342, + "learning_rate": 1.4359622777100265e-05, + "loss": 0.5407, + "step": 646 + }, + { + "epoch": 1.4539325842696629, + "grad_norm": 0.3230539858341217, + "learning_rate": 1.430530543134139e-05, + "loss": 0.0455, + "step": 647 + }, + { + "epoch": 1.456179775280899, + "grad_norm": 3.2401561737060547, + "learning_rate": 1.425111842321305e-05, + "loss": 0.6599, + "step": 648 + }, + { + "epoch": 1.4584269662921348, + "grad_norm": 3.941396474838257, + "learning_rate": 1.4197062807670971e-05, + "loss": 0.6952, + "step": 649 + }, + { + "epoch": 1.4606741573033708, + "grad_norm": 0.5517943501472473, + "learning_rate": 1.4143139637112801e-05, + "loss": 0.0329, + "step": 650 + }, + { + "epoch": 1.4629213483146066, + "grad_norm": 3.1321299076080322, + "learning_rate": 1.4089349961357648e-05, + "loss": 0.6939, + "step": 651 + }, + { + "epoch": 1.4651685393258427, + "grad_norm": 3.5495405197143555, + "learning_rate": 1.403569482762563e-05, + "loss": 0.4664, + "step": 652 + }, + { + "epoch": 1.4674157303370787, + "grad_norm": 3.9030630588531494, + "learning_rate": 1.3982175280517512e-05, + "loss": 0.6686, + "step": 653 + }, + { + "epoch": 1.4696629213483146, + "grad_norm": 3.5179669857025146, + "learning_rate": 1.3928792361994335e-05, + "loss": 0.6167, + "step": 654 + }, + { + "epoch": 1.4719101123595506, + "grad_norm": 3.5082061290740967, + "learning_rate": 1.3875547111357156e-05, + "loss": 0.6612, + "step": 655 + }, + { + "epoch": 1.4741573033707867, + "grad_norm": 4.214035511016846, + "learning_rate": 1.382244056522679e-05, + "loss": 0.8139, + "step": 656 + }, + { + "epoch": 1.4764044943820225, + "grad_norm": 3.5473194122314453, + "learning_rate": 1.3769473757523668e-05, + "loss": 0.6813, + "step": 657 + }, + { + "epoch": 1.4786516853932583, + "grad_norm": 3.59326171875, + "learning_rate": 1.3716647719447648e-05, + "loss": 0.6031, + "step": 658 + }, + { + "epoch": 1.4808988764044944, + "grad_norm": 2.1092872619628906, + "learning_rate": 1.3663963479458006e-05, + "loss": 0.1783, + "step": 659 + }, + { + "epoch": 1.4831460674157304, + "grad_norm": 2.6726906299591064, + "learning_rate": 1.3611422063253356e-05, + "loss": 0.6536, + "step": 660 + }, + { + "epoch": 1.4853932584269662, + "grad_norm": 0.31012779474258423, + "learning_rate": 1.3559024493751731e-05, + "loss": 0.0318, + "step": 661 + }, + { + "epoch": 1.4876404494382023, + "grad_norm": 2.775973320007324, + "learning_rate": 1.3506771791070609e-05, + "loss": 0.6372, + "step": 662 + }, + { + "epoch": 1.4898876404494383, + "grad_norm": 3.1341476440429688, + "learning_rate": 1.3454664972507112e-05, + "loss": 0.5695, + "step": 663 + }, + { + "epoch": 1.4921348314606742, + "grad_norm": 5.889291763305664, + "learning_rate": 1.3402705052518162e-05, + "loss": 2.3259, + "step": 664 + }, + { + "epoch": 1.49438202247191, + "grad_norm": 2.9970600605010986, + "learning_rate": 1.3350893042700749e-05, + "loss": 0.4342, + "step": 665 + }, + { + "epoch": 1.496629213483146, + "grad_norm": 2.2179033756256104, + "learning_rate": 1.329922995177222e-05, + "loss": 0.2176, + "step": 666 + }, + { + "epoch": 1.498876404494382, + "grad_norm": 3.6805996894836426, + "learning_rate": 1.324771678555067e-05, + "loss": 0.5419, + "step": 667 + }, + { + "epoch": 1.501123595505618, + "grad_norm": 3.182274103164673, + "learning_rate": 1.3196354546935319e-05, + "loss": 0.4976, + "step": 668 + }, + { + "epoch": 1.503370786516854, + "grad_norm": 1.9249639511108398, + "learning_rate": 1.3145144235887022e-05, + "loss": 0.1964, + "step": 669 + }, + { + "epoch": 1.50561797752809, + "grad_norm": 0.4039813280105591, + "learning_rate": 1.3094086849408782e-05, + "loss": 0.0311, + "step": 670 + }, + { + "epoch": 1.5078651685393258, + "grad_norm": 1.8193391561508179, + "learning_rate": 1.3043183381526351e-05, + "loss": 0.1832, + "step": 671 + }, + { + "epoch": 1.5101123595505617, + "grad_norm": 0.25979480147361755, + "learning_rate": 1.2992434823268868e-05, + "loss": 0.0345, + "step": 672 + }, + { + "epoch": 1.5123595505617977, + "grad_norm": 3.0886144638061523, + "learning_rate": 1.2941842162649562e-05, + "loss": 0.5376, + "step": 673 + }, + { + "epoch": 1.5146067415730338, + "grad_norm": 3.398355484008789, + "learning_rate": 1.289140638464653e-05, + "loss": 0.6316, + "step": 674 + }, + { + "epoch": 1.5168539325842696, + "grad_norm": 3.3084335327148438, + "learning_rate": 1.284112847118356e-05, + "loss": 0.5025, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.6423732042312622, + "eval_VitaminC_cosine_ap": 0.5559419972419435, + "eval_VitaminC_cosine_f1": 0.6693548387096774, + "eval_VitaminC_cosine_f1_threshold": 0.3631063997745514, + "eval_VitaminC_cosine_precision": 0.5050709939148073, + "eval_VitaminC_cosine_recall": 0.9920318725099602, + "eval_VitaminC_dot_accuracy": 0.552734375, + "eval_VitaminC_dot_accuracy_threshold": 314.47454833984375, + "eval_VitaminC_dot_ap": 0.5368600163832479, + "eval_VitaminC_dot_f1": 0.6720867208672087, + "eval_VitaminC_dot_f1_threshold": 146.22268676757812, + "eval_VitaminC_dot_precision": 0.5092402464065708, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.560546875, + "eval_VitaminC_euclidean_accuracy_threshold": 15.797218322753906, + "eval_VitaminC_euclidean_ap": 0.5588590475965204, + "eval_VitaminC_euclidean_f1": 0.6666666666666667, + "eval_VitaminC_euclidean_f1_threshold": 22.122737884521484, + "eval_VitaminC_euclidean_precision": 0.5040816326530613, + "eval_VitaminC_euclidean_recall": 0.9840637450199203, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 290.7606201171875, + "eval_VitaminC_manhattan_ap": 0.5586013714034852, + "eval_VitaminC_manhattan_f1": 0.6657381615598886, + "eval_VitaminC_manhattan_f1_threshold": 422.6849365234375, + "eval_VitaminC_manhattan_precision": 0.5117773019271948, + "eval_VitaminC_manhattan_recall": 0.952191235059761, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 314.47454833984375, + "eval_VitaminC_max_ap": 0.5588590475965204, + "eval_VitaminC_max_f1": 0.6720867208672087, + "eval_VitaminC_max_f1_threshold": 422.6849365234375, + "eval_VitaminC_max_precision": 0.5117773019271948, + "eval_VitaminC_max_recall": 0.9920318725099602, + "eval_sequential_score": 0.5588590475965204, + "eval_sts-test_pearson_cosine": 0.8865581717304933, + "eval_sts-test_pearson_dot": 0.8767512116280293, + "eval_sts-test_pearson_euclidean": 0.9075796979057666, + "eval_sts-test_pearson_manhattan": 0.9085979238979228, + "eval_sts-test_pearson_max": 0.9085979238979228, + "eval_sts-test_spearman_cosine": 0.906657109942627, + "eval_sts-test_spearman_dot": 0.8758823835039438, + "eval_sts-test_spearman_euclidean": 0.9031548314748749, + "eval_sts-test_spearman_manhattan": 0.9039239266531472, + "eval_sts-test_spearman_max": 0.906657109942627, + "eval_vitaminc-pairs_loss": 1.3484903573989868, + "eval_vitaminc-pairs_runtime": 1.897, + "eval_vitaminc-pairs_samples_per_second": 56.932, + "eval_vitaminc-pairs_steps_per_second": 1.054, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_negation-triplets_loss": 0.907042920589447, + "eval_negation-triplets_runtime": 0.3044, + "eval_negation-triplets_samples_per_second": 210.234, + "eval_negation-triplets_steps_per_second": 3.285, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_scitail-pairs-pos_loss": 0.0944281667470932, + "eval_scitail-pairs-pos_runtime": 0.4051, + "eval_scitail-pairs-pos_samples_per_second": 133.286, + "eval_scitail-pairs-pos_steps_per_second": 2.468, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_scitail-pairs-qa_loss": 0.0006068490329198539, + "eval_scitail-pairs-qa_runtime": 0.5379, + "eval_scitail-pairs-qa_samples_per_second": 237.942, + "eval_scitail-pairs-qa_steps_per_second": 3.718, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_xsum-pairs_loss": 0.02087360806763172, + "eval_xsum-pairs_runtime": 2.7473, + "eval_xsum-pairs_samples_per_second": 46.592, + "eval_xsum-pairs_steps_per_second": 0.728, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_sciq_pairs_loss": 0.016422858461737633, + "eval_sciq_pairs_runtime": 2.9202, + "eval_sciq_pairs_samples_per_second": 43.832, + "eval_sciq_pairs_steps_per_second": 0.685, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_qasc_pairs_loss": 0.0907311737537384, + "eval_qasc_pairs_runtime": 0.663, + "eval_qasc_pairs_samples_per_second": 193.059, + "eval_qasc_pairs_steps_per_second": 3.017, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_openbookqa_pairs_loss": 0.74045729637146, + "eval_openbookqa_pairs_runtime": 0.5939, + "eval_openbookqa_pairs_samples_per_second": 215.54, + "eval_openbookqa_pairs_steps_per_second": 3.368, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_msmarco_pairs_loss": 0.14498473703861237, + "eval_msmarco_pairs_runtime": 1.4994, + "eval_msmarco_pairs_samples_per_second": 85.365, + "eval_msmarco_pairs_steps_per_second": 1.334, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_nq_pairs_loss": 0.08948007225990295, + "eval_nq_pairs_runtime": 2.3655, + "eval_nq_pairs_samples_per_second": 54.11, + "eval_nq_pairs_steps_per_second": 0.845, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_trivia_pairs_loss": 0.5190236568450928, + "eval_trivia_pairs_runtime": 3.6004, + "eval_trivia_pairs_samples_per_second": 35.552, + "eval_trivia_pairs_steps_per_second": 0.555, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_gooaq_pairs_loss": 0.253094881772995, + "eval_gooaq_pairs_runtime": 0.9388, + "eval_gooaq_pairs_samples_per_second": 136.338, + "eval_gooaq_pairs_steps_per_second": 2.13, + "step": 675 + }, + { + "epoch": 1.5168539325842696, + "eval_paws-pos_loss": 0.023870982229709625, + "eval_paws-pos_runtime": 0.707, + "eval_paws-pos_samples_per_second": 181.056, + "eval_paws-pos_steps_per_second": 2.829, + "step": 675 + }, + { + "epoch": 1.5191011235955056, + "grad_norm": 2.906019449234009, + "learning_rate": 1.2791009401110988e-05, + "loss": 0.509, + "step": 676 + }, + { + "epoch": 1.5213483146067417, + "grad_norm": 3.241637706756592, + "learning_rate": 1.2741050150186684e-05, + "loss": 0.6078, + "step": 677 + }, + { + "epoch": 1.5235955056179775, + "grad_norm": 2.7639479637145996, + "learning_rate": 1.2691251691057008e-05, + "loss": 0.3961, + "step": 678 + }, + { + "epoch": 1.5258426966292133, + "grad_norm": 3.347423553466797, + "learning_rate": 1.264161499323793e-05, + "loss": 0.5699, + "step": 679 + }, + { + "epoch": 1.5280898876404494, + "grad_norm": 3.417011022567749, + "learning_rate": 1.2592141023096081e-05, + "loss": 0.6305, + "step": 680 + }, + { + "epoch": 1.5303370786516854, + "grad_norm": 3.702047348022461, + "learning_rate": 1.2542830743830018e-05, + "loss": 0.5886, + "step": 681 + }, + { + "epoch": 1.5325842696629213, + "grad_norm": 2.654712438583374, + "learning_rate": 1.2493685115451417e-05, + "loss": 0.6432, + "step": 682 + }, + { + "epoch": 1.5348314606741573, + "grad_norm": 3.164024829864502, + "learning_rate": 1.2444705094766392e-05, + "loss": 0.614, + "step": 683 + }, + { + "epoch": 1.5370786516853934, + "grad_norm": 0.31392890214920044, + "learning_rate": 1.2395891635356883e-05, + "loss": 0.0432, + "step": 684 + }, + { + "epoch": 1.5393258426966292, + "grad_norm": 2.7912144660949707, + "learning_rate": 1.2347245687562097e-05, + "loss": 0.633, + "step": 685 + }, + { + "epoch": 1.541573033707865, + "grad_norm": 3.678194999694824, + "learning_rate": 1.229876819845997e-05, + "loss": 0.6228, + "step": 686 + }, + { + "epoch": 1.543820224719101, + "grad_norm": 2.2250537872314453, + "learning_rate": 1.2250460111848757e-05, + "loss": 0.2105, + "step": 687 + }, + { + "epoch": 1.5460674157303371, + "grad_norm": 3.0338246822357178, + "learning_rate": 1.2202322368228655e-05, + "loss": 0.5429, + "step": 688 + }, + { + "epoch": 1.548314606741573, + "grad_norm": 3.2483761310577393, + "learning_rate": 1.2154355904783493e-05, + "loss": 0.5361, + "step": 689 + }, + { + "epoch": 1.550561797752809, + "grad_norm": 3.114161491394043, + "learning_rate": 1.2106561655362458e-05, + "loss": 0.5567, + "step": 690 + }, + { + "epoch": 1.552808988764045, + "grad_norm": 3.2030935287475586, + "learning_rate": 1.2058940550461972e-05, + "loss": 0.6131, + "step": 691 + }, + { + "epoch": 1.5550561797752809, + "grad_norm": 3.438405990600586, + "learning_rate": 1.201149351720751e-05, + "loss": 0.5111, + "step": 692 + }, + { + "epoch": 1.5573033707865167, + "grad_norm": 3.1341006755828857, + "learning_rate": 1.1964221479335612e-05, + "loss": 0.6216, + "step": 693 + }, + { + "epoch": 1.5595505617977528, + "grad_norm": 4.309159278869629, + "learning_rate": 1.1917125357175833e-05, + "loss": 0.7615, + "step": 694 + }, + { + "epoch": 1.5617977528089888, + "grad_norm": 3.412702798843384, + "learning_rate": 1.1870206067632897e-05, + "loss": 0.51, + "step": 695 + }, + { + "epoch": 1.5640449438202246, + "grad_norm": 2.8780102729797363, + "learning_rate": 1.1823464524168793e-05, + "loss": 0.6989, + "step": 696 + }, + { + "epoch": 1.5662921348314607, + "grad_norm": 3.7508151531219482, + "learning_rate": 1.1776901636785002e-05, + "loss": 0.8145, + "step": 697 + }, + { + "epoch": 1.5685393258426967, + "grad_norm": 3.1193883419036865, + "learning_rate": 1.1730518312004793e-05, + "loss": 0.5928, + "step": 698 + }, + { + "epoch": 1.5707865168539326, + "grad_norm": 3.2519209384918213, + "learning_rate": 1.1684315452855578e-05, + "loss": 0.6046, + "step": 699 + }, + { + "epoch": 1.5730337078651684, + "grad_norm": 2.7838470935821533, + "learning_rate": 1.163829395885131e-05, + "loss": 0.6483, + "step": 700 + }, + { + "epoch": 1.5752808988764047, + "grad_norm": 2.958461046218872, + "learning_rate": 1.159245472597498e-05, + "loss": 0.3976, + "step": 701 + }, + { + "epoch": 1.5775280898876405, + "grad_norm": 2.3022193908691406, + "learning_rate": 1.1546798646661172e-05, + "loss": 0.2033, + "step": 702 + }, + { + "epoch": 1.5797752808988763, + "grad_norm": 2.9034605026245117, + "learning_rate": 1.1501326609778704e-05, + "loss": 0.4127, + "step": 703 + }, + { + "epoch": 1.5820224719101124, + "grad_norm": 3.1683311462402344, + "learning_rate": 1.1456039500613304e-05, + "loss": 0.6008, + "step": 704 + }, + { + "epoch": 1.5842696629213484, + "grad_norm": 3.1939926147460938, + "learning_rate": 1.1410938200850377e-05, + "loss": 0.5346, + "step": 705 + }, + { + "epoch": 1.5865168539325842, + "grad_norm": 2.758613348007202, + "learning_rate": 1.1366023588557834e-05, + "loss": 0.4183, + "step": 706 + }, + { + "epoch": 1.58876404494382, + "grad_norm": 0.3631807267665863, + "learning_rate": 1.132129653816903e-05, + "loss": 0.0245, + "step": 707 + }, + { + "epoch": 1.5910112359550563, + "grad_norm": 2.9882872104644775, + "learning_rate": 1.1276757920465702e-05, + "loss": 0.4834, + "step": 708 + }, + { + "epoch": 1.5932584269662922, + "grad_norm": 3.5191538333892822, + "learning_rate": 1.1232408602561024e-05, + "loss": 0.5815, + "step": 709 + }, + { + "epoch": 1.595505617977528, + "grad_norm": 3.9756875038146973, + "learning_rate": 1.1188249447882736e-05, + "loss": 0.7791, + "step": 710 + }, + { + "epoch": 1.597752808988764, + "grad_norm": 3.263510227203369, + "learning_rate": 1.114428131615634e-05, + "loss": 0.4835, + "step": 711 + }, + { + "epoch": 1.6, + "grad_norm": 3.1563074588775635, + "learning_rate": 1.1100505063388333e-05, + "loss": 0.5797, + "step": 712 + }, + { + "epoch": 1.602247191011236, + "grad_norm": 2.0562615394592285, + "learning_rate": 1.1056921541849578e-05, + "loss": 0.1891, + "step": 713 + }, + { + "epoch": 1.604494382022472, + "grad_norm": 2.464853286743164, + "learning_rate": 1.1013531600058679e-05, + "loss": 0.3955, + "step": 714 + }, + { + "epoch": 1.606741573033708, + "grad_norm": 3.1884946823120117, + "learning_rate": 1.0970336082765479e-05, + "loss": 0.497, + "step": 715 + }, + { + "epoch": 1.6089887640449438, + "grad_norm": 2.6583571434020996, + "learning_rate": 1.092733583093462e-05, + "loss": 0.6271, + "step": 716 + }, + { + "epoch": 1.6112359550561797, + "grad_norm": 3.7117207050323486, + "learning_rate": 1.0884531681729152e-05, + "loss": 0.5571, + "step": 717 + }, + { + "epoch": 1.6134831460674157, + "grad_norm": 0.321676641702652, + "learning_rate": 1.0841924468494243e-05, + "loss": 0.0405, + "step": 718 + }, + { + "epoch": 1.6157303370786518, + "grad_norm": 2.33481764793396, + "learning_rate": 1.0799515020740955e-05, + "loss": 0.2968, + "step": 719 + }, + { + "epoch": 1.6179775280898876, + "grad_norm": 2.9772684574127197, + "learning_rate": 1.0757304164130105e-05, + "loss": 0.7262, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_VitaminC_cosine_accuracy": 0.560546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8018198609352112, + "eval_VitaminC_cosine_ap": 0.5551830056983754, + "eval_VitaminC_cosine_f1": 0.6657534246575343, + "eval_VitaminC_cosine_f1_threshold": 0.4161919951438904, + "eval_VitaminC_cosine_precision": 0.5073068893528184, + "eval_VitaminC_cosine_recall": 0.9681274900398407, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 320.4762268066406, + "eval_VitaminC_dot_ap": 0.5305147029680797, + "eval_VitaminC_dot_f1": 0.6693766937669378, + "eval_VitaminC_dot_f1_threshold": 142.318115234375, + "eval_VitaminC_dot_precision": 0.5071868583162218, + "eval_VitaminC_dot_recall": 0.9840637450199203, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 15.623578071594238, + "eval_VitaminC_euclidean_ap": 0.5552396175565026, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 24.57459259033203, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 233.19009399414062, + "eval_VitaminC_manhattan_ap": 0.5548444881216961, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 521.407470703125, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 320.4762268066406, + "eval_VitaminC_max_ap": 0.5552396175565026, + "eval_VitaminC_max_f1": 0.6693766937669378, + "eval_VitaminC_max_f1_threshold": 521.407470703125, + "eval_VitaminC_max_precision": 0.5073068893528184, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5552396175565026, + "eval_sts-test_pearson_cosine": 0.887808686322094, + "eval_sts-test_pearson_dot": 0.8803917434966819, + "eval_sts-test_pearson_euclidean": 0.9086887931212514, + "eval_sts-test_pearson_manhattan": 0.9097294718375346, + "eval_sts-test_pearson_max": 0.9097294718375346, + "eval_sts-test_spearman_cosine": 0.9090755128594795, + "eval_sts-test_spearman_dot": 0.8816835342693723, + "eval_sts-test_spearman_euclidean": 0.9045753260139605, + "eval_sts-test_spearman_manhattan": 0.9058602689822915, + "eval_sts-test_spearman_max": 0.9090755128594795, + "eval_vitaminc-pairs_loss": 1.3776708841323853, + "eval_vitaminc-pairs_runtime": 1.9, + "eval_vitaminc-pairs_samples_per_second": 56.841, + "eval_vitaminc-pairs_steps_per_second": 1.053, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_negation-triplets_loss": 0.9084223508834839, + "eval_negation-triplets_runtime": 0.3036, + "eval_negation-triplets_samples_per_second": 210.795, + "eval_negation-triplets_steps_per_second": 3.294, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_scitail-pairs-pos_loss": 0.1010468602180481, + "eval_scitail-pairs-pos_runtime": 0.4089, + "eval_scitail-pairs-pos_samples_per_second": 132.069, + "eval_scitail-pairs-pos_steps_per_second": 2.446, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_scitail-pairs-qa_loss": 0.0006560595356859267, + "eval_scitail-pairs-qa_runtime": 0.5374, + "eval_scitail-pairs-qa_samples_per_second": 238.165, + "eval_scitail-pairs-qa_steps_per_second": 3.721, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_xsum-pairs_loss": 0.01978565938770771, + "eval_xsum-pairs_runtime": 2.753, + "eval_xsum-pairs_samples_per_second": 46.495, + "eval_xsum-pairs_steps_per_second": 0.726, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_sciq_pairs_loss": 0.016458621248602867, + "eval_sciq_pairs_runtime": 2.8893, + "eval_sciq_pairs_samples_per_second": 44.301, + "eval_sciq_pairs_steps_per_second": 0.692, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_qasc_pairs_loss": 0.09319126605987549, + "eval_qasc_pairs_runtime": 0.6588, + "eval_qasc_pairs_samples_per_second": 194.3, + "eval_qasc_pairs_steps_per_second": 3.036, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_openbookqa_pairs_loss": 0.7171750068664551, + "eval_openbookqa_pairs_runtime": 0.5942, + "eval_openbookqa_pairs_samples_per_second": 215.414, + "eval_openbookqa_pairs_steps_per_second": 3.366, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_msmarco_pairs_loss": 0.14239048957824707, + "eval_msmarco_pairs_runtime": 1.5034, + "eval_msmarco_pairs_samples_per_second": 85.141, + "eval_msmarco_pairs_steps_per_second": 1.33, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_nq_pairs_loss": 0.0960390493273735, + "eval_nq_pairs_runtime": 2.3578, + "eval_nq_pairs_samples_per_second": 54.288, + "eval_nq_pairs_steps_per_second": 0.848, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_trivia_pairs_loss": 0.49868205189704895, + "eval_trivia_pairs_runtime": 3.591, + "eval_trivia_pairs_samples_per_second": 35.645, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_gooaq_pairs_loss": 0.27023670077323914, + "eval_gooaq_pairs_runtime": 0.9169, + "eval_gooaq_pairs_samples_per_second": 139.603, + "eval_gooaq_pairs_steps_per_second": 2.181, + "step": 720 + }, + { + "epoch": 1.6179775280898876, + "eval_paws-pos_loss": 0.024076081812381744, + "eval_paws-pos_runtime": 0.7075, + "eval_paws-pos_samples_per_second": 180.921, + "eval_paws-pos_steps_per_second": 2.827, + "step": 720 + }, + { + "epoch": 1.6202247191011236, + "grad_norm": 4.036292552947998, + "learning_rate": 1.0715292720456164e-05, + "loss": 0.7611, + "step": 721 + }, + { + "epoch": 1.6224719101123597, + "grad_norm": 2.709695339202881, + "learning_rate": 1.0673481507631287e-05, + "loss": 0.3926, + "step": 722 + }, + { + "epoch": 1.6247191011235955, + "grad_norm": 5.689388751983643, + "learning_rate": 1.0631871339669366e-05, + "loss": 2.3127, + "step": 723 + }, + { + "epoch": 1.6269662921348313, + "grad_norm": 2.782240152359009, + "learning_rate": 1.05904630266702e-05, + "loss": 0.7026, + "step": 724 + }, + { + "epoch": 1.6292134831460674, + "grad_norm": 0.44289153814315796, + "learning_rate": 1.0549257374803709e-05, + "loss": 0.0685, + "step": 725 + }, + { + "epoch": 1.6314606741573034, + "grad_norm": 3.638007879257202, + "learning_rate": 1.0508255186294249e-05, + "loss": 0.6031, + "step": 726 + }, + { + "epoch": 1.6337078651685393, + "grad_norm": 3.3411712646484375, + "learning_rate": 1.0467457259404982e-05, + "loss": 0.579, + "step": 727 + }, + { + "epoch": 1.6359550561797753, + "grad_norm": 1.792189121246338, + "learning_rate": 1.0426864388422359e-05, + "loss": 0.1705, + "step": 728 + }, + { + "epoch": 1.6382022471910114, + "grad_norm": 0.44782668352127075, + "learning_rate": 1.0386477363640624e-05, + "loss": 0.0591, + "step": 729 + }, + { + "epoch": 1.6404494382022472, + "grad_norm": 5.771501064300537, + "learning_rate": 1.0346296971346449e-05, + "loss": 2.1115, + "step": 730 + }, + { + "epoch": 1.642696629213483, + "grad_norm": 3.1382172107696533, + "learning_rate": 1.030632399380362e-05, + "loss": 0.4871, + "step": 731 + }, + { + "epoch": 1.644943820224719, + "grad_norm": 2.9366180896759033, + "learning_rate": 1.0266559209237823e-05, + "loss": 0.4263, + "step": 732 + }, + { + "epoch": 1.6471910112359551, + "grad_norm": 0.3291671574115753, + "learning_rate": 1.022700339182144e-05, + "loss": 0.0484, + "step": 733 + }, + { + "epoch": 1.649438202247191, + "grad_norm": 3.614391565322876, + "learning_rate": 1.0187657311658554e-05, + "loss": 0.5249, + "step": 734 + }, + { + "epoch": 1.651685393258427, + "grad_norm": 2.7633137702941895, + "learning_rate": 1.0148521734769896e-05, + "loss": 0.3998, + "step": 735 + }, + { + "epoch": 1.653932584269663, + "grad_norm": 3.61558198928833, + "learning_rate": 1.0109597423077964e-05, + "loss": 0.7226, + "step": 736 + }, + { + "epoch": 1.6561797752808989, + "grad_norm": 3.115598440170288, + "learning_rate": 1.0070885134392154e-05, + "loss": 0.4494, + "step": 737 + }, + { + "epoch": 1.6584269662921347, + "grad_norm": 0.40766555070877075, + "learning_rate": 1.0032385622394063e-05, + "loss": 0.0537, + "step": 738 + }, + { + "epoch": 1.6606741573033708, + "grad_norm": 3.0065345764160156, + "learning_rate": 9.994099636622759e-06, + "loss": 0.7129, + "step": 739 + }, + { + "epoch": 1.6629213483146068, + "grad_norm": 3.3741817474365234, + "learning_rate": 9.956027922460216e-06, + "loss": 0.6079, + "step": 740 + }, + { + "epoch": 1.6651685393258426, + "grad_norm": 3.619709014892578, + "learning_rate": 9.918171221116802e-06, + "loss": 0.6688, + "step": 741 + }, + { + "epoch": 1.6674157303370787, + "grad_norm": 2.7903146743774414, + "learning_rate": 9.880530269616847e-06, + "loss": 0.567, + "step": 742 + }, + { + "epoch": 1.6696629213483147, + "grad_norm": 3.1688435077667236, + "learning_rate": 9.843105800784284e-06, + "loss": 0.5196, + "step": 743 + }, + { + "epoch": 1.6719101123595506, + "grad_norm": 3.0614845752716064, + "learning_rate": 9.805898543228392e-06, + "loss": 0.5081, + "step": 744 + }, + { + "epoch": 1.6741573033707864, + "grad_norm": 3.2361667156219482, + "learning_rate": 9.768909221329611e-06, + "loss": 0.5413, + "step": 745 + }, + { + "epoch": 1.6764044943820224, + "grad_norm": 3.0333893299102783, + "learning_rate": 9.732138555225442e-06, + "loss": 0.4741, + "step": 746 + }, + { + "epoch": 1.6786516853932585, + "grad_norm": 0.5086472630500793, + "learning_rate": 9.6955872607964e-06, + "loss": 0.0289, + "step": 747 + }, + { + "epoch": 1.6808988764044943, + "grad_norm": 2.052063465118408, + "learning_rate": 9.65925604965213e-06, + "loss": 0.1956, + "step": 748 + }, + { + "epoch": 1.6831460674157304, + "grad_norm": 2.2275161743164062, + "learning_rate": 9.623145629117488e-06, + "loss": 0.1967, + "step": 749 + }, + { + "epoch": 1.6853932584269664, + "grad_norm": 3.2939271926879883, + "learning_rate": 9.587256702218833e-06, + "loss": 0.6488, + "step": 750 + }, + { + "epoch": 1.6876404494382022, + "grad_norm": 2.664923906326294, + "learning_rate": 9.551589967670282e-06, + "loss": 0.7052, + "step": 751 + }, + { + "epoch": 1.689887640449438, + "grad_norm": 1.887410283088684, + "learning_rate": 9.516146119860157e-06, + "loss": 0.1807, + "step": 752 + }, + { + "epoch": 1.6921348314606741, + "grad_norm": 3.4962358474731445, + "learning_rate": 9.480925848837433e-06, + "loss": 0.6238, + "step": 753 + }, + { + "epoch": 1.6943820224719102, + "grad_norm": 3.2788219451904297, + "learning_rate": 9.445929840298317e-06, + "loss": 0.6328, + "step": 754 + }, + { + "epoch": 1.696629213483146, + "grad_norm": 3.1477396488189697, + "learning_rate": 9.411158775572893e-06, + "loss": 0.4677, + "step": 755 + }, + { + "epoch": 1.698876404494382, + "grad_norm": 2.82737135887146, + "learning_rate": 9.376613331611867e-06, + "loss": 0.44, + "step": 756 + }, + { + "epoch": 1.701123595505618, + "grad_norm": 3.121051549911499, + "learning_rate": 9.342294180973379e-06, + "loss": 0.5382, + "step": 757 + }, + { + "epoch": 1.703370786516854, + "grad_norm": 3.4081382751464844, + "learning_rate": 9.308201991809902e-06, + "loss": 0.6094, + "step": 758 + }, + { + "epoch": 1.7056179775280897, + "grad_norm": 0.4050018787384033, + "learning_rate": 9.274337427855252e-06, + "loss": 0.0262, + "step": 759 + }, + { + "epoch": 1.7078651685393258, + "grad_norm": 2.0232627391815186, + "learning_rate": 9.240701148411655e-06, + "loss": 0.1995, + "step": 760 + }, + { + "epoch": 1.7101123595505618, + "grad_norm": 2.8510541915893555, + "learning_rate": 9.207293808336911e-06, + "loss": 0.6595, + "step": 761 + }, + { + "epoch": 1.7123595505617977, + "grad_norm": 2.99471116065979, + "learning_rate": 9.174116058031651e-06, + "loss": 0.4056, + "step": 762 + }, + { + "epoch": 1.7146067415730337, + "grad_norm": 3.1772732734680176, + "learning_rate": 9.14116854342666e-06, + "loss": 0.4836, + "step": 763 + }, + { + "epoch": 1.7168539325842698, + "grad_norm": 3.142526626586914, + "learning_rate": 9.108451905970327e-06, + "loss": 0.5474, + "step": 764 + }, + { + "epoch": 1.7191011235955056, + "grad_norm": 2.5622060298919678, + "learning_rate": 9.07596678261612e-06, + "loss": 0.6019, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8168195486068726, + "eval_VitaminC_cosine_ap": 0.5542265425878483, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.4378119707107544, + "eval_VitaminC_cosine_precision": 0.5083682008368201, + "eval_VitaminC_cosine_recall": 0.9681274900398407, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 316.8749694824219, + "eval_VitaminC_dot_ap": 0.5313491176773233, + "eval_VitaminC_dot_f1": 0.6693657219973009, + "eval_VitaminC_dot_f1_threshold": 142.36911010742188, + "eval_VitaminC_dot_precision": 0.5061224489795918, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 14.925283432006836, + "eval_VitaminC_euclidean_ap": 0.5567864372707304, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 24.09148406982422, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 227.16958618164062, + "eval_VitaminC_manhattan_ap": 0.5581138503752323, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 510.7401123046875, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 316.8749694824219, + "eval_VitaminC_max_ap": 0.5581138503752323, + "eval_VitaminC_max_f1": 0.6693657219973009, + "eval_VitaminC_max_f1_threshold": 510.7401123046875, + "eval_VitaminC_max_precision": 0.5083682008368201, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5581138503752323, + "eval_sts-test_pearson_cosine": 0.8872001916630274, + "eval_sts-test_pearson_dot": 0.8772620948169378, + "eval_sts-test_pearson_euclidean": 0.9089212280249863, + "eval_sts-test_pearson_manhattan": 0.9095742817856052, + "eval_sts-test_pearson_max": 0.9095742817856052, + "eval_sts-test_spearman_cosine": 0.9093684546165476, + "eval_sts-test_spearman_dot": 0.8787381852496359, + "eval_sts-test_spearman_euclidean": 0.9049343206328422, + "eval_sts-test_spearman_manhattan": 0.9058068359761904, + "eval_sts-test_spearman_max": 0.9093684546165476, + "eval_vitaminc-pairs_loss": 1.400781512260437, + "eval_vitaminc-pairs_runtime": 1.884, + "eval_vitaminc-pairs_samples_per_second": 57.326, + "eval_vitaminc-pairs_steps_per_second": 1.062, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_negation-triplets_loss": 0.9053038954734802, + "eval_negation-triplets_runtime": 0.3005, + "eval_negation-triplets_samples_per_second": 212.958, + "eval_negation-triplets_steps_per_second": 3.327, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_scitail-pairs-pos_loss": 0.1003662496805191, + "eval_scitail-pairs-pos_runtime": 0.3961, + "eval_scitail-pairs-pos_samples_per_second": 136.314, + "eval_scitail-pairs-pos_steps_per_second": 2.524, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_scitail-pairs-qa_loss": 0.0006318774539977312, + "eval_scitail-pairs-qa_runtime": 0.5296, + "eval_scitail-pairs-qa_samples_per_second": 241.684, + "eval_scitail-pairs-qa_steps_per_second": 3.776, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_xsum-pairs_loss": 0.021298767998814583, + "eval_xsum-pairs_runtime": 2.7366, + "eval_xsum-pairs_samples_per_second": 46.773, + "eval_xsum-pairs_steps_per_second": 0.731, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_sciq_pairs_loss": 0.015965810045599937, + "eval_sciq_pairs_runtime": 2.8798, + "eval_sciq_pairs_samples_per_second": 44.448, + "eval_sciq_pairs_steps_per_second": 0.695, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_qasc_pairs_loss": 0.09514283388853073, + "eval_qasc_pairs_runtime": 0.659, + "eval_qasc_pairs_samples_per_second": 194.226, + "eval_qasc_pairs_steps_per_second": 3.035, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_openbookqa_pairs_loss": 0.7150779366493225, + "eval_openbookqa_pairs_runtime": 0.5886, + "eval_openbookqa_pairs_samples_per_second": 217.448, + "eval_openbookqa_pairs_steps_per_second": 3.398, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_msmarco_pairs_loss": 0.13628047704696655, + "eval_msmarco_pairs_runtime": 1.4926, + "eval_msmarco_pairs_samples_per_second": 85.759, + "eval_msmarco_pairs_steps_per_second": 1.34, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_nq_pairs_loss": 0.09483325481414795, + "eval_nq_pairs_runtime": 2.3544, + "eval_nq_pairs_samples_per_second": 54.365, + "eval_nq_pairs_steps_per_second": 0.849, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_trivia_pairs_loss": 0.5036953091621399, + "eval_trivia_pairs_runtime": 3.5852, + "eval_trivia_pairs_samples_per_second": 35.702, + "eval_trivia_pairs_steps_per_second": 0.558, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_gooaq_pairs_loss": 0.25342443585395813, + "eval_gooaq_pairs_runtime": 0.9151, + "eval_gooaq_pairs_samples_per_second": 139.873, + "eval_gooaq_pairs_steps_per_second": 2.186, + "step": 765 + }, + { + "epoch": 1.7191011235955056, + "eval_paws-pos_loss": 0.02396133542060852, + "eval_paws-pos_runtime": 0.6961, + "eval_paws-pos_samples_per_second": 183.884, + "eval_paws-pos_steps_per_second": 2.873, + "step": 765 + }, + { + "epoch": 1.7213483146067414, + "grad_norm": 1.7996478080749512, + "learning_rate": 9.04371380581023e-06, + "loss": 0.1824, + "step": 766 + }, + { + "epoch": 1.7235955056179775, + "grad_norm": 2.8171517848968506, + "learning_rate": 9.011693603479218e-06, + "loss": 0.6398, + "step": 767 + }, + { + "epoch": 1.7258426966292135, + "grad_norm": 1.8335927724838257, + "learning_rate": 8.979906799017817e-06, + "loss": 0.1518, + "step": 768 + }, + { + "epoch": 1.7280898876404494, + "grad_norm": 3.8396594524383545, + "learning_rate": 8.948354011276773e-06, + "loss": 0.7804, + "step": 769 + }, + { + "epoch": 1.7303370786516854, + "grad_norm": 2.0686426162719727, + "learning_rate": 8.91703585455082e-06, + "loss": 0.2294, + "step": 770 + }, + { + "epoch": 1.7325842696629215, + "grad_norm": 3.792858123779297, + "learning_rate": 8.885952938566709e-06, + "loss": 0.719, + "step": 771 + }, + { + "epoch": 1.7348314606741573, + "grad_norm": 3.530104398727417, + "learning_rate": 8.855105868471325e-06, + "loss": 0.61, + "step": 772 + }, + { + "epoch": 1.737078651685393, + "grad_norm": 3.1915283203125, + "learning_rate": 8.82449524481993e-06, + "loss": 0.5865, + "step": 773 + }, + { + "epoch": 1.7393258426966294, + "grad_norm": 2.984779119491577, + "learning_rate": 8.794121663564459e-06, + "loss": 0.4411, + "step": 774 + }, + { + "epoch": 1.7415730337078652, + "grad_norm": 3.3878366947174072, + "learning_rate": 8.763985716041908e-06, + "loss": 0.6174, + "step": 775 + }, + { + "epoch": 1.743820224719101, + "grad_norm": 0.42201510071754456, + "learning_rate": 8.734087988962838e-06, + "loss": 0.0526, + "step": 776 + }, + { + "epoch": 1.746067415730337, + "grad_norm": 3.0642645359039307, + "learning_rate": 8.70442906439994e-06, + "loss": 0.5093, + "step": 777 + }, + { + "epoch": 1.7483146067415731, + "grad_norm": 3.7137510776519775, + "learning_rate": 8.675009519776724e-06, + "loss": 0.6742, + "step": 778 + }, + { + "epoch": 1.750561797752809, + "grad_norm": 0.45391860604286194, + "learning_rate": 8.645829927856232e-06, + "loss": 0.0293, + "step": 779 + }, + { + "epoch": 1.7528089887640448, + "grad_norm": 1.823586344718933, + "learning_rate": 8.616890856729943e-06, + "loss": 0.1776, + "step": 780 + }, + { + "epoch": 1.755056179775281, + "grad_norm": 2.9259328842163086, + "learning_rate": 8.588192869806671e-06, + "loss": 0.6964, + "step": 781 + }, + { + "epoch": 1.7573033707865169, + "grad_norm": 2.0311052799224854, + "learning_rate": 8.559736525801625e-06, + "loss": 0.2044, + "step": 782 + }, + { + "epoch": 1.7595505617977527, + "grad_norm": 3.3475892543792725, + "learning_rate": 8.531522378725498e-06, + "loss": 0.5221, + "step": 783 + }, + { + "epoch": 1.7617977528089888, + "grad_norm": 3.183722496032715, + "learning_rate": 8.503550977873718e-06, + "loss": 0.579, + "step": 784 + }, + { + "epoch": 1.7640449438202248, + "grad_norm": 2.976590394973755, + "learning_rate": 8.47582286781573e-06, + "loss": 0.5887, + "step": 785 + }, + { + "epoch": 1.7662921348314606, + "grad_norm": 2.8340189456939697, + "learning_rate": 8.448338588384402e-06, + "loss": 0.4357, + "step": 786 + }, + { + "epoch": 1.7685393258426965, + "grad_norm": 3.2601733207702637, + "learning_rate": 8.42109867466551e-06, + "loss": 0.5437, + "step": 787 + }, + { + "epoch": 1.7707865168539327, + "grad_norm": 0.4331270754337311, + "learning_rate": 8.394103656987329e-06, + "loss": 0.0326, + "step": 788 + }, + { + "epoch": 1.7730337078651686, + "grad_norm": 3.940329074859619, + "learning_rate": 8.367354060910303e-06, + "loss": 0.7279, + "step": 789 + }, + { + "epoch": 1.7752808988764044, + "grad_norm": 2.0091605186462402, + "learning_rate": 8.340850407216812e-06, + "loss": 0.2255, + "step": 790 + }, + { + "epoch": 1.7775280898876404, + "grad_norm": 3.209312915802002, + "learning_rate": 8.314593211901029e-06, + "loss": 0.5386, + "step": 791 + }, + { + "epoch": 1.7797752808988765, + "grad_norm": 0.31416022777557373, + "learning_rate": 8.288582986158893e-06, + "loss": 0.0218, + "step": 792 + }, + { + "epoch": 1.7820224719101123, + "grad_norm": 0.2632823586463928, + "learning_rate": 8.26282023637813e-06, + "loss": 0.0174, + "step": 793 + }, + { + "epoch": 1.7842696629213484, + "grad_norm": 3.311159610748291, + "learning_rate": 8.237305464128416e-06, + "loss": 0.542, + "step": 794 + }, + { + "epoch": 1.7865168539325844, + "grad_norm": 3.435530662536621, + "learning_rate": 8.212039166151593e-06, + "loss": 0.511, + "step": 795 + }, + { + "epoch": 1.7887640449438202, + "grad_norm": 0.29949724674224854, + "learning_rate": 8.187021834352023e-06, + "loss": 0.0345, + "step": 796 + }, + { + "epoch": 1.791011235955056, + "grad_norm": 2.876715660095215, + "learning_rate": 8.162253955786986e-06, + "loss": 0.6513, + "step": 797 + }, + { + "epoch": 1.7932584269662921, + "grad_norm": 0.1600445955991745, + "learning_rate": 8.137736012657215e-06, + "loss": 0.0069, + "step": 798 + }, + { + "epoch": 1.7955056179775282, + "grad_norm": 0.4229116141796112, + "learning_rate": 8.11346848229749e-06, + "loss": 0.0467, + "step": 799 + }, + { + "epoch": 1.797752808988764, + "grad_norm": 2.8151891231536865, + "learning_rate": 8.089451837167374e-06, + "loss": 0.6994, + "step": 800 + }, + { + "epoch": 1.8, + "grad_norm": 2.6854257583618164, + "learning_rate": 8.065686544841985e-06, + "loss": 0.6583, + "step": 801 + }, + { + "epoch": 1.802247191011236, + "grad_norm": 0.18526576459407806, + "learning_rate": 8.042173068002905e-06, + "loss": 0.0059, + "step": 802 + }, + { + "epoch": 1.804494382022472, + "grad_norm": 2.002289056777954, + "learning_rate": 8.018911864429175e-06, + "loss": 0.1896, + "step": 803 + }, + { + "epoch": 1.8067415730337077, + "grad_norm": 6.600093364715576, + "learning_rate": 7.995903386988378e-06, + "loss": 2.2539, + "step": 804 + }, + { + "epoch": 1.8089887640449438, + "grad_norm": 1.968558669090271, + "learning_rate": 7.97314808362783e-06, + "loss": 0.1933, + "step": 805 + }, + { + "epoch": 1.8112359550561798, + "grad_norm": 3.4320321083068848, + "learning_rate": 7.950646397365845e-06, + "loss": 0.5681, + "step": 806 + }, + { + "epoch": 1.8134831460674157, + "grad_norm": 1.6512911319732666, + "learning_rate": 7.928398766283123e-06, + "loss": 0.1692, + "step": 807 + }, + { + "epoch": 1.8157303370786517, + "grad_norm": 3.648367166519165, + "learning_rate": 7.90640562351421e-06, + "loss": 0.6595, + "step": 808 + }, + { + "epoch": 1.8179775280898878, + "grad_norm": 1.6848154067993164, + "learning_rate": 7.884667397239081e-06, + "loss": 0.1603, + "step": 809 + }, + { + "epoch": 1.8202247191011236, + "grad_norm": 2.91007661819458, + "learning_rate": 7.863184510674787e-06, + "loss": 0.6671, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7359669208526611, + "eval_VitaminC_cosine_ap": 0.5545359684929722, + "eval_VitaminC_cosine_f1": 0.6684782608695652, + "eval_VitaminC_cosine_f1_threshold": 0.3982100784778595, + "eval_VitaminC_cosine_precision": 0.5072164948453608, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 320.06512451171875, + "eval_VitaminC_dot_ap": 0.5338853081563194, + "eval_VitaminC_dot_f1": 0.6711772665764546, + "eval_VitaminC_dot_f1_threshold": 138.87033081054688, + "eval_VitaminC_dot_precision": 0.5081967213114754, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.56546401977539, + "eval_VitaminC_euclidean_ap": 0.5573421124721921, + "eval_VitaminC_euclidean_f1": 0.6657789613848203, + "eval_VitaminC_euclidean_f1_threshold": 23.108264923095703, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 0.9960159362549801, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 242.0438232421875, + "eval_VitaminC_manhattan_ap": 0.5585886377626182, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 522.1114501953125, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 320.06512451171875, + "eval_VitaminC_max_ap": 0.5585886377626182, + "eval_VitaminC_max_f1": 0.6711772665764546, + "eval_VitaminC_max_f1_threshold": 522.1114501953125, + "eval_VitaminC_max_precision": 0.5081967213114754, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5585886377626182, + "eval_sts-test_pearson_cosine": 0.8862376792316814, + "eval_sts-test_pearson_dot": 0.8772674400865361, + "eval_sts-test_pearson_euclidean": 0.9079859903622429, + "eval_sts-test_pearson_manhattan": 0.9085935531378193, + "eval_sts-test_pearson_max": 0.9085935531378193, + "eval_sts-test_spearman_cosine": 0.908348260110608, + "eval_sts-test_spearman_dot": 0.8802999595209344, + "eval_sts-test_spearman_euclidean": 0.9040154429826927, + "eval_sts-test_spearman_manhattan": 0.9042455092175059, + "eval_sts-test_spearman_max": 0.908348260110608, + "eval_vitaminc-pairs_loss": 1.3900337219238281, + "eval_vitaminc-pairs_runtime": 1.8861, + "eval_vitaminc-pairs_samples_per_second": 57.262, + "eval_vitaminc-pairs_steps_per_second": 1.06, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_negation-triplets_loss": 0.8989883065223694, + "eval_negation-triplets_runtime": 0.2978, + "eval_negation-triplets_samples_per_second": 214.878, + "eval_negation-triplets_steps_per_second": 3.357, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_scitail-pairs-pos_loss": 0.10765840113162994, + "eval_scitail-pairs-pos_runtime": 0.3829, + "eval_scitail-pairs-pos_samples_per_second": 141.019, + "eval_scitail-pairs-pos_steps_per_second": 2.611, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_scitail-pairs-qa_loss": 0.0005394439795054495, + "eval_scitail-pairs-qa_runtime": 0.5255, + "eval_scitail-pairs-qa_samples_per_second": 243.591, + "eval_scitail-pairs-qa_steps_per_second": 3.806, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_xsum-pairs_loss": 0.020049653947353363, + "eval_xsum-pairs_runtime": 2.7348, + "eval_xsum-pairs_samples_per_second": 46.805, + "eval_xsum-pairs_steps_per_second": 0.731, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_sciq_pairs_loss": 0.01575099490582943, + "eval_sciq_pairs_runtime": 2.8287, + "eval_sciq_pairs_samples_per_second": 45.251, + "eval_sciq_pairs_steps_per_second": 0.707, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_qasc_pairs_loss": 0.09813623130321503, + "eval_qasc_pairs_runtime": 0.6548, + "eval_qasc_pairs_samples_per_second": 195.48, + "eval_qasc_pairs_steps_per_second": 3.054, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_openbookqa_pairs_loss": 0.6886358857154846, + "eval_openbookqa_pairs_runtime": 0.5819, + "eval_openbookqa_pairs_samples_per_second": 219.955, + "eval_openbookqa_pairs_steps_per_second": 3.437, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_msmarco_pairs_loss": 0.12771743535995483, + "eval_msmarco_pairs_runtime": 1.4904, + "eval_msmarco_pairs_samples_per_second": 85.884, + "eval_msmarco_pairs_steps_per_second": 1.342, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_nq_pairs_loss": 0.08647548407316208, + "eval_nq_pairs_runtime": 2.3498, + "eval_nq_pairs_samples_per_second": 54.472, + "eval_nq_pairs_steps_per_second": 0.851, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_trivia_pairs_loss": 0.49437975883483887, + "eval_trivia_pairs_runtime": 3.595, + "eval_trivia_pairs_samples_per_second": 35.605, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_gooaq_pairs_loss": 0.2529779374599457, + "eval_gooaq_pairs_runtime": 0.9117, + "eval_gooaq_pairs_samples_per_second": 140.4, + "eval_gooaq_pairs_steps_per_second": 2.194, + "step": 810 + }, + { + "epoch": 1.8202247191011236, + "eval_paws-pos_loss": 0.024454889819025993, + "eval_paws-pos_runtime": 0.6908, + "eval_paws-pos_samples_per_second": 185.289, + "eval_paws-pos_steps_per_second": 2.895, + "step": 810 + }, + { + "epoch": 1.8224719101123594, + "grad_norm": 1.7971607446670532, + "learning_rate": 7.841957382067224e-06, + "loss": 0.1995, + "step": 811 + }, + { + "epoch": 1.8247191011235955, + "grad_norm": 3.942342519760132, + "learning_rate": 7.820986424682986e-06, + "loss": 0.5579, + "step": 812 + }, + { + "epoch": 1.8269662921348315, + "grad_norm": 2.735011577606201, + "learning_rate": 7.800272046801332e-06, + "loss": 0.3833, + "step": 813 + }, + { + "epoch": 1.8292134831460674, + "grad_norm": 3.4616143703460693, + "learning_rate": 7.779814651706219e-06, + "loss": 0.6411, + "step": 814 + }, + { + "epoch": 1.8314606741573034, + "grad_norm": 2.609600782394409, + "learning_rate": 7.75961463767846e-06, + "loss": 0.6034, + "step": 815 + }, + { + "epoch": 1.8337078651685395, + "grad_norm": 3.4293768405914307, + "learning_rate": 7.73967239798797e-06, + "loss": 0.5206, + "step": 816 + }, + { + "epoch": 1.8359550561797753, + "grad_norm": 2.578787088394165, + "learning_rate": 7.719988320886112e-06, + "loss": 0.5941, + "step": 817 + }, + { + "epoch": 1.838202247191011, + "grad_norm": 2.0583581924438477, + "learning_rate": 7.700562789598128e-06, + "loss": 0.2062, + "step": 818 + }, + { + "epoch": 1.8404494382022472, + "grad_norm": 3.2634506225585938, + "learning_rate": 7.68139618231569e-06, + "loss": 0.6086, + "step": 819 + }, + { + "epoch": 1.8426966292134832, + "grad_norm": 0.3572952151298523, + "learning_rate": 7.662488872189526e-06, + "loss": 0.037, + "step": 820 + }, + { + "epoch": 1.844943820224719, + "grad_norm": 2.8284902572631836, + "learning_rate": 7.643841227322173e-06, + "loss": 0.6257, + "step": 821 + }, + { + "epoch": 1.847191011235955, + "grad_norm": 2.7638444900512695, + "learning_rate": 7.625453610760782e-06, + "loss": 0.7064, + "step": 822 + }, + { + "epoch": 1.8494382022471911, + "grad_norm": 3.253589391708374, + "learning_rate": 7.60732638049008e-06, + "loss": 0.563, + "step": 823 + }, + { + "epoch": 1.851685393258427, + "grad_norm": 2.748392343521118, + "learning_rate": 7.5894598894253795e-06, + "loss": 0.4359, + "step": 824 + }, + { + "epoch": 1.8539325842696628, + "grad_norm": 0.0, + "learning_rate": 7.571854485405722e-06, + "loss": 0.0, + "step": 825 + }, + { + "epoch": 1.8561797752808988, + "grad_norm": 2.3260743618011475, + "learning_rate": 7.554510511187089e-06, + "loss": 0.233, + "step": 826 + }, + { + "epoch": 1.8584269662921349, + "grad_norm": 0.35933050513267517, + "learning_rate": 7.537428304435747e-06, + "loss": 0.0335, + "step": 827 + }, + { + "epoch": 1.8606741573033707, + "grad_norm": 2.8572092056274414, + "learning_rate": 7.520608197721665e-06, + "loss": 0.6077, + "step": 828 + }, + { + "epoch": 1.8629213483146068, + "grad_norm": 2.002993583679199, + "learning_rate": 7.504050518512034e-06, + "loss": 0.1707, + "step": 829 + }, + { + "epoch": 1.8651685393258428, + "grad_norm": 3.3585002422332764, + "learning_rate": 7.487755589164904e-06, + "loss": 0.5807, + "step": 830 + }, + { + "epoch": 1.8674157303370786, + "grad_norm": 3.4473483562469482, + "learning_rate": 7.471723726922902e-06, + "loss": 0.6566, + "step": 831 + }, + { + "epoch": 1.8696629213483145, + "grad_norm": 2.6746721267700195, + "learning_rate": 7.455955243907055e-06, + "loss": 0.663, + "step": 832 + }, + { + "epoch": 1.8719101123595505, + "grad_norm": 3.2465124130249023, + "learning_rate": 7.44045044711071e-06, + "loss": 0.5896, + "step": 833 + }, + { + "epoch": 1.8741573033707866, + "grad_norm": 3.425652027130127, + "learning_rate": 7.425209638393565e-06, + "loss": 0.5418, + "step": 834 + }, + { + "epoch": 1.8764044943820224, + "grad_norm": 3.13114595413208, + "learning_rate": 7.410233114475789e-06, + "loss": 0.5735, + "step": 835 + }, + { + "epoch": 1.8786516853932584, + "grad_norm": 2.1753461360931396, + "learning_rate": 7.395521166932242e-06, + "loss": 0.2062, + "step": 836 + }, + { + "epoch": 1.8808988764044945, + "grad_norm": 3.1065316200256348, + "learning_rate": 7.381074082186805e-06, + "loss": 0.4343, + "step": 837 + }, + { + "epoch": 1.8831460674157303, + "grad_norm": 0.4167785942554474, + "learning_rate": 7.366892141506793e-06, + "loss": 0.0614, + "step": 838 + }, + { + "epoch": 1.8853932584269661, + "grad_norm": 4.009187698364258, + "learning_rate": 7.352975620997496e-06, + "loss": 0.6301, + "step": 839 + }, + { + "epoch": 1.8876404494382022, + "grad_norm": 2.7548623085021973, + "learning_rate": 7.339324791596779e-06, + "loss": 0.3956, + "step": 840 + }, + { + "epoch": 1.8898876404494382, + "grad_norm": 0.39213764667510986, + "learning_rate": 7.325939919069839e-06, + "loss": 0.0479, + "step": 841 + }, + { + "epoch": 1.892134831460674, + "grad_norm": 1.7549433708190918, + "learning_rate": 7.312821264003997e-06, + "loss": 0.1819, + "step": 842 + }, + { + "epoch": 1.8943820224719101, + "grad_norm": 3.251699209213257, + "learning_rate": 7.299969081803653e-06, + "loss": 0.6005, + "step": 843 + }, + { + "epoch": 1.8966292134831462, + "grad_norm": 3.131671905517578, + "learning_rate": 7.287383622685292e-06, + "loss": 0.452, + "step": 844 + }, + { + "epoch": 1.898876404494382, + "grad_norm": 2.7567901611328125, + "learning_rate": 7.275065131672632e-06, + "loss": 0.4083, + "step": 845 + }, + { + "epoch": 1.9011235955056178, + "grad_norm": 1.7303539514541626, + "learning_rate": 7.263013848591836e-06, + "loss": 0.1702, + "step": 846 + }, + { + "epoch": 1.903370786516854, + "grad_norm": 8.231098175048828, + "learning_rate": 7.251230008066854e-06, + "loss": 0.9503, + "step": 847 + }, + { + "epoch": 1.90561797752809, + "grad_norm": 2.864522933959961, + "learning_rate": 7.239713839514851e-06, + "loss": 0.6427, + "step": 848 + }, + { + "epoch": 1.9078651685393258, + "grad_norm": 0.1522376835346222, + "learning_rate": 7.228465567141745e-06, + "loss": 0.0048, + "step": 849 + }, + { + "epoch": 1.9101123595505618, + "grad_norm": 3.1846060752868652, + "learning_rate": 7.217485409937831e-06, + "loss": 0.4609, + "step": 850 + }, + { + "epoch": 1.9123595505617978, + "grad_norm": 2.612794876098633, + "learning_rate": 7.206773581673535e-06, + "loss": 0.3854, + "step": 851 + }, + { + "epoch": 1.9146067415730337, + "grad_norm": 3.0257163047790527, + "learning_rate": 7.196330290895232e-06, + "loss": 0.4411, + "step": 852 + }, + { + "epoch": 1.9168539325842695, + "grad_norm": 1.7561123371124268, + "learning_rate": 7.186155740921204e-06, + "loss": 0.181, + "step": 853 + }, + { + "epoch": 1.9191011235955058, + "grad_norm": 3.124577522277832, + "learning_rate": 7.176250129837667e-06, + "loss": 0.5846, + "step": 854 + }, + { + "epoch": 1.9213483146067416, + "grad_norm": 2.7159767150878906, + "learning_rate": 7.166613650494926e-06, + "loss": 0.3585, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.737371027469635, + "eval_VitaminC_cosine_ap": 0.5540762464644882, + "eval_VitaminC_cosine_f1": 0.670299727520436, + "eval_VitaminC_cosine_f1_threshold": 0.4220387935638428, + "eval_VitaminC_cosine_precision": 0.5093167701863354, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 319.16412353515625, + "eval_VitaminC_dot_ap": 0.5341689006460674, + "eval_VitaminC_dot_f1": 0.6720867208672087, + "eval_VitaminC_dot_f1_threshold": 144.9567413330078, + "eval_VitaminC_dot_precision": 0.5092402464065708, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 11.91163444519043, + "eval_VitaminC_euclidean_ap": 0.5565214436993116, + "eval_VitaminC_euclidean_f1": 0.6666666666666667, + "eval_VitaminC_euclidean_f1_threshold": 20.597320556640625, + "eval_VitaminC_euclidean_precision": 0.5083682008368201, + "eval_VitaminC_euclidean_recall": 0.9681274900398407, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 239.43701171875, + "eval_VitaminC_manhattan_ap": 0.555925496941737, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 521.5595703125, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 319.16412353515625, + "eval_VitaminC_max_ap": 0.5565214436993116, + "eval_VitaminC_max_f1": 0.6720867208672087, + "eval_VitaminC_max_f1_threshold": 521.5595703125, + "eval_VitaminC_max_precision": 0.5093167701863354, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5565214436993116, + "eval_sts-test_pearson_cosine": 0.8878150413343597, + "eval_sts-test_pearson_dot": 0.8786014862759446, + "eval_sts-test_pearson_euclidean": 0.9089800587896777, + "eval_sts-test_pearson_manhattan": 0.9094011707715276, + "eval_sts-test_pearson_max": 0.9094011707715276, + "eval_sts-test_spearman_cosine": 0.9084799183216214, + "eval_sts-test_spearman_dot": 0.8800682374894496, + "eval_sts-test_spearman_euclidean": 0.9044720848539306, + "eval_sts-test_spearman_manhattan": 0.905363261473279, + "eval_sts-test_spearman_max": 0.9084799183216214, + "eval_vitaminc-pairs_loss": 1.3997077941894531, + "eval_vitaminc-pairs_runtime": 1.8754, + "eval_vitaminc-pairs_samples_per_second": 57.587, + "eval_vitaminc-pairs_steps_per_second": 1.066, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_negation-triplets_loss": 0.8800344467163086, + "eval_negation-triplets_runtime": 0.2974, + "eval_negation-triplets_samples_per_second": 215.197, + "eval_negation-triplets_steps_per_second": 3.362, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_scitail-pairs-pos_loss": 0.10502482950687408, + "eval_scitail-pairs-pos_runtime": 0.3842, + "eval_scitail-pairs-pos_samples_per_second": 140.569, + "eval_scitail-pairs-pos_steps_per_second": 2.603, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_scitail-pairs-qa_loss": 0.000583611719775945, + "eval_scitail-pairs-qa_runtime": 0.524, + "eval_scitail-pairs-qa_samples_per_second": 244.278, + "eval_scitail-pairs-qa_steps_per_second": 3.817, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_xsum-pairs_loss": 0.019778922200202942, + "eval_xsum-pairs_runtime": 2.7334, + "eval_xsum-pairs_samples_per_second": 46.829, + "eval_xsum-pairs_steps_per_second": 0.732, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_sciq_pairs_loss": 0.016238627955317497, + "eval_sciq_pairs_runtime": 2.8338, + "eval_sciq_pairs_samples_per_second": 45.169, + "eval_sciq_pairs_steps_per_second": 0.706, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_qasc_pairs_loss": 0.0897185355424881, + "eval_qasc_pairs_runtime": 0.6521, + "eval_qasc_pairs_samples_per_second": 196.295, + "eval_qasc_pairs_steps_per_second": 3.067, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_openbookqa_pairs_loss": 0.6814875602722168, + "eval_openbookqa_pairs_runtime": 0.5819, + "eval_openbookqa_pairs_samples_per_second": 219.951, + "eval_openbookqa_pairs_steps_per_second": 3.437, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_msmarco_pairs_loss": 0.1330471634864807, + "eval_msmarco_pairs_runtime": 1.487, + "eval_msmarco_pairs_samples_per_second": 86.082, + "eval_msmarco_pairs_steps_per_second": 1.345, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_nq_pairs_loss": 0.09231739491224289, + "eval_nq_pairs_runtime": 2.3518, + "eval_nq_pairs_samples_per_second": 54.427, + "eval_nq_pairs_steps_per_second": 0.85, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_trivia_pairs_loss": 0.49163100123405457, + "eval_trivia_pairs_runtime": 3.5773, + "eval_trivia_pairs_samples_per_second": 35.781, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_gooaq_pairs_loss": 0.2540152370929718, + "eval_gooaq_pairs_runtime": 0.9247, + "eval_gooaq_pairs_samples_per_second": 138.418, + "eval_gooaq_pairs_steps_per_second": 2.163, + "step": 855 + }, + { + "epoch": 1.9213483146067416, + "eval_paws-pos_loss": 0.024291109293699265, + "eval_paws-pos_runtime": 0.6907, + "eval_paws-pos_samples_per_second": 185.32, + "eval_paws-pos_steps_per_second": 2.896, + "step": 855 + }, + { + "epoch": 1.9235955056179774, + "grad_norm": 2.8243184089660645, + "learning_rate": 7.157246490503611e-06, + "loss": 0.4303, + "step": 856 + }, + { + "epoch": 1.9258426966292135, + "grad_norm": 3.640563726425171, + "learning_rate": 7.148148832231039e-06, + "loss": 0.5627, + "step": 857 + }, + { + "epoch": 1.9280898876404495, + "grad_norm": 2.036550521850586, + "learning_rate": 7.1393208527976385e-06, + "loss": 0.1687, + "step": 858 + }, + { + "epoch": 1.9303370786516854, + "grad_norm": 3.249506950378418, + "learning_rate": 7.130762724073527e-06, + "loss": 0.5509, + "step": 859 + }, + { + "epoch": 1.9325842696629212, + "grad_norm": 1.7619365453720093, + "learning_rate": 7.122474612675153e-06, + "loss": 0.175, + "step": 860 + }, + { + "epoch": 1.9348314606741575, + "grad_norm": 3.532602071762085, + "learning_rate": 7.114456679962048e-06, + "loss": 0.605, + "step": 861 + }, + { + "epoch": 1.9370786516853933, + "grad_norm": 3.2661614418029785, + "learning_rate": 7.106709082033693e-06, + "loss": 0.5085, + "step": 862 + }, + { + "epoch": 1.939325842696629, + "grad_norm": 3.216998815536499, + "learning_rate": 7.09923196972648e-06, + "loss": 0.5059, + "step": 863 + }, + { + "epoch": 1.9415730337078652, + "grad_norm": 3.1379799842834473, + "learning_rate": 7.092025488610771e-06, + "loss": 0.6114, + "step": 864 + }, + { + "epoch": 1.9438202247191012, + "grad_norm": 3.0328826904296875, + "learning_rate": 7.08508977898806e-06, + "loss": 0.5132, + "step": 865 + }, + { + "epoch": 1.946067415730337, + "grad_norm": 2.766942262649536, + "learning_rate": 7.078424975888252e-06, + "loss": 0.4178, + "step": 866 + }, + { + "epoch": 1.948314606741573, + "grad_norm": 3.3092901706695557, + "learning_rate": 7.072031209067025e-06, + "loss": 0.6022, + "step": 867 + }, + { + "epoch": 1.9505617977528091, + "grad_norm": 3.0691306591033936, + "learning_rate": 7.06590860300331e-06, + "loss": 0.5691, + "step": 868 + }, + { + "epoch": 1.952808988764045, + "grad_norm": 2.871619462966919, + "learning_rate": 7.060057276896864e-06, + "loss": 0.7299, + "step": 869 + }, + { + "epoch": 1.9550561797752808, + "grad_norm": 0.3670404255390167, + "learning_rate": 7.054477344665952e-06, + "loss": 0.0441, + "step": 870 + }, + { + "epoch": 1.9573033707865168, + "grad_norm": 3.10213303565979, + "learning_rate": 7.049168914945126e-06, + "loss": 0.5855, + "step": 871 + }, + { + "epoch": 1.9595505617977529, + "grad_norm": 0.7625748515129089, + "learning_rate": 7.044132091083108e-06, + "loss": 0.0151, + "step": 872 + }, + { + "epoch": 1.9617977528089887, + "grad_norm": 1.9246089458465576, + "learning_rate": 7.039366971140788e-06, + "loss": 0.184, + "step": 873 + }, + { + "epoch": 1.9640449438202248, + "grad_norm": 3.3005852699279785, + "learning_rate": 7.034873647889306e-06, + "loss": 0.6185, + "step": 874 + }, + { + "epoch": 1.9662921348314608, + "grad_norm": 2.894439935684204, + "learning_rate": 7.030652208808245e-06, + "loss": 0.6474, + "step": 875 + }, + { + "epoch": 1.9685393258426966, + "grad_norm": 0.026136571541428566, + "learning_rate": 7.026702736083935e-06, + "loss": 0.0005, + "step": 876 + }, + { + "epoch": 1.9707865168539325, + "grad_norm": 3.418996572494507, + "learning_rate": 7.023025306607848e-06, + "loss": 0.6692, + "step": 877 + }, + { + "epoch": 1.9730337078651685, + "grad_norm": 3.3654134273529053, + "learning_rate": 7.019619991975102e-06, + "loss": 0.496, + "step": 878 + }, + { + "epoch": 1.9752808988764046, + "grad_norm": 3.123142957687378, + "learning_rate": 7.016486858483064e-06, + "loss": 0.5654, + "step": 879 + }, + { + "epoch": 1.9775280898876404, + "grad_norm": 3.3420400619506836, + "learning_rate": 7.013625967130067e-06, + "loss": 0.4925, + "step": 880 + }, + { + "epoch": 1.9797752808988764, + "grad_norm": 0.0, + "learning_rate": 7.011037373614215e-06, + "loss": 0.0, + "step": 881 + }, + { + "epoch": 1.9820224719101125, + "grad_norm": 4.277894496917725, + "learning_rate": 7.008721128332304e-06, + "loss": 0.2304, + "step": 882 + }, + { + "epoch": 1.9842696629213483, + "grad_norm": 1.8923455476760864, + "learning_rate": 7.006677276378835e-06, + "loss": 0.1772, + "step": 883 + }, + { + "epoch": 1.9865168539325841, + "grad_norm": 1.7901984453201294, + "learning_rate": 7.00490585754514e-06, + "loss": 0.1804, + "step": 884 + }, + { + "epoch": 1.9887640449438202, + "grad_norm": 0.90578693151474, + "learning_rate": 7.003406906318611e-06, + "loss": 0.0198, + "step": 885 + }, + { + "epoch": 1.9910112359550562, + "grad_norm": 2.6822283267974854, + "learning_rate": 7.002180451882019e-06, + "loss": 0.6703, + "step": 886 + }, + { + "epoch": 1.993258426966292, + "grad_norm": 3.0391242504119873, + "learning_rate": 7.001226518112952e-06, + "loss": 0.1552, + "step": 887 + }, + { + "epoch": 1.9955056179775281, + "grad_norm": 3.118842363357544, + "learning_rate": 7.000545123583352e-06, + "loss": 0.4962, + "step": 888 + }, + { + "epoch": 1.9977528089887642, + "grad_norm": 4.216142177581787, + "learning_rate": 7.000136281559146e-06, + "loss": 0.2099, + "step": 889 + }, + { + "epoch": 2.0, + "grad_norm": 2.8663406372070312, + "learning_rate": 6.999999999999999e-06, + "loss": 0.6554, + "step": 890 + } + ], + "logging_steps": 1, + "max_steps": 890, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 89, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 640, + "trial_name": null, + "trial_params": null +}