diff --git "a/checkpoint-576/trainer_state.json" "b/checkpoint-576/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-576/trainer_state.json" @@ -0,0 +1,6621 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8, + "eval_steps": 32, + "global_step": 576, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.003125, + "grad_norm": 3.1757984161376953, + "learning_rate": 3.125e-07, + "loss": 0.7374, + "step": 1 + }, + { + "epoch": 0.00625, + "grad_norm": 3.137390375137329, + "learning_rate": 6.25e-07, + "loss": 0.5723, + "step": 2 + }, + { + "epoch": 0.009375, + "grad_norm": 2.765856981277466, + "learning_rate": 9.375000000000001e-07, + "loss": 0.551, + "step": 3 + }, + { + "epoch": 0.0125, + "grad_norm": 3.468062162399292, + "learning_rate": 1.25e-06, + "loss": 0.7379, + "step": 4 + }, + { + "epoch": 0.015625, + "grad_norm": 2.6695668697357178, + "learning_rate": 1.5625e-06, + "loss": 0.5271, + "step": 5 + }, + { + "epoch": 0.01875, + "grad_norm": 2.7720863819122314, + "learning_rate": 1.8750000000000003e-06, + "loss": 0.5858, + "step": 6 + }, + { + "epoch": 0.021875, + "grad_norm": 3.0211267471313477, + "learning_rate": 2.1875000000000002e-06, + "loss": 0.6562, + "step": 7 + }, + { + "epoch": 0.025, + "grad_norm": 3.641108989715576, + "learning_rate": 2.5e-06, + "loss": 0.8228, + "step": 8 + }, + { + "epoch": 0.028125, + "grad_norm": 3.9061200618743896, + "learning_rate": 2.8125e-06, + "loss": 0.9988, + "step": 9 + }, + { + "epoch": 0.03125, + "grad_norm": 2.642423391342163, + "learning_rate": 3.125e-06, + "loss": 0.5582, + "step": 10 + }, + { + "epoch": 0.034375, + "grad_norm": 3.6546943187713623, + "learning_rate": 3.4375e-06, + "loss": 0.8546, + "step": 11 + }, + { + "epoch": 0.0375, + "grad_norm": 2.5504300594329834, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.4235, + "step": 12 + }, + { + "epoch": 0.040625, + "grad_norm": 2.845123529434204, + "learning_rate": 4.0625000000000005e-06, + "loss": 0.6418, + "step": 13 + }, + { + "epoch": 0.04375, + "grad_norm": 2.8562164306640625, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.6577, + "step": 14 + }, + { + "epoch": 0.046875, + "grad_norm": 3.4033620357513428, + "learning_rate": 4.6875000000000004e-06, + "loss": 0.8333, + "step": 15 + }, + { + "epoch": 0.05, + "grad_norm": 2.148242473602295, + "learning_rate": 5e-06, + "loss": 0.4082, + "step": 16 + }, + { + "epoch": 0.053125, + "grad_norm": 3.685960292816162, + "learning_rate": 5.3125e-06, + "loss": 0.8101, + "step": 17 + }, + { + "epoch": 0.05625, + "grad_norm": 2.7071452140808105, + "learning_rate": 5.625e-06, + "loss": 0.5259, + "step": 18 + }, + { + "epoch": 0.059375, + "grad_norm": 3.508561611175537, + "learning_rate": 5.9375e-06, + "loss": 0.9015, + "step": 19 + }, + { + "epoch": 0.0625, + "grad_norm": 4.140976428985596, + "learning_rate": 6.25e-06, + "loss": 1.3915, + "step": 20 + }, + { + "epoch": 0.065625, + "grad_norm": 1.5563820600509644, + "learning_rate": 6.5625e-06, + "loss": 0.26, + "step": 21 + }, + { + "epoch": 0.06875, + "grad_norm": 3.1467344760894775, + "learning_rate": 6.875e-06, + "loss": 0.6885, + "step": 22 + }, + { + "epoch": 0.071875, + "grad_norm": 3.539327383041382, + "learning_rate": 7.1875e-06, + "loss": 0.9357, + "step": 23 + }, + { + "epoch": 0.075, + "grad_norm": 3.1691510677337646, + "learning_rate": 7.500000000000001e-06, + "loss": 0.7168, + "step": 24 + }, + { + "epoch": 0.078125, + "grad_norm": 3.9020121097564697, + "learning_rate": 7.8125e-06, + "loss": 0.8678, + "step": 25 + }, + { + "epoch": 0.08125, + "grad_norm": 2.3635435104370117, + "learning_rate": 8.125000000000001e-06, + "loss": 0.4922, + "step": 26 + }, + { + "epoch": 0.084375, + "grad_norm": 2.5170037746429443, + "learning_rate": 8.4375e-06, + "loss": 0.4937, + "step": 27 + }, + { + "epoch": 0.0875, + "grad_norm": 2.7988407611846924, + "learning_rate": 8.750000000000001e-06, + "loss": 0.5891, + "step": 28 + }, + { + "epoch": 0.090625, + "grad_norm": 2.99135160446167, + "learning_rate": 9.0625e-06, + "loss": 0.6921, + "step": 29 + }, + { + "epoch": 0.09375, + "grad_norm": 3.098013162612915, + "learning_rate": 9.375000000000001e-06, + "loss": 0.8087, + "step": 30 + }, + { + "epoch": 0.096875, + "grad_norm": 3.358091115951538, + "learning_rate": 9.6875e-06, + "loss": 0.805, + "step": 31 + }, + { + "epoch": 0.1, + "grad_norm": 3.0206046104431152, + "learning_rate": 1e-05, + "loss": 0.6141, + "step": 32 + }, + { + "epoch": 0.1, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8487042188644409, + "eval_VitaminC_cosine_ap": 0.5467207830251657, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2510407269001007, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 318.7947082519531, + "eval_VitaminC_dot_ap": 0.5360598625078122, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 98.82717895507812, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.370981216430664, + "eval_VitaminC_euclidean_ap": 0.54465834495355, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 24.364877700805664, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 273.6689758300781, + "eval_VitaminC_manhattan_ap": 0.5450408710915566, + "eval_VitaminC_manhattan_f1": 0.6675531914893617, + "eval_VitaminC_manhattan_f1_threshold": 502.82244873046875, + "eval_VitaminC_manhattan_precision": 0.500998003992016, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5546875, + "eval_VitaminC_max_accuracy_threshold": 318.7947082519531, + "eval_VitaminC_max_ap": 0.5467207830251657, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 502.82244873046875, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5467207830251657, + "eval_sts-test_pearson_cosine": 0.8677868917853514, + "eval_sts-test_pearson_dot": 0.8601917125112223, + "eval_sts-test_pearson_euclidean": 0.889472619726378, + "eval_sts-test_pearson_manhattan": 0.890143281884324, + "eval_sts-test_pearson_max": 0.890143281884324, + "eval_sts-test_spearman_cosine": 0.8954519734959775, + "eval_sts-test_spearman_dot": 0.8621348855070287, + "eval_sts-test_spearman_euclidean": 0.8880001748147683, + "eval_sts-test_spearman_manhattan": 0.8870461226731652, + "eval_sts-test_spearman_max": 0.8954519734959775, + "eval_vitaminc-pairs_loss": 2.332582473754883, + "eval_vitaminc-pairs_runtime": 2.2432, + "eval_vitaminc-pairs_samples_per_second": 48.146, + "eval_vitaminc-pairs_steps_per_second": 0.892, + "step": 32 + }, + { + "epoch": 0.1, + "eval_negation-triplets_loss": 0.8681236505508423, + "eval_negation-triplets_runtime": 0.2927, + "eval_negation-triplets_samples_per_second": 218.641, + "eval_negation-triplets_steps_per_second": 3.416, + "step": 32 + }, + { + "epoch": 0.1, + "eval_scitail-pairs-pos_loss": 0.07759770005941391, + "eval_scitail-pairs-pos_runtime": 0.3708, + "eval_scitail-pairs-pos_samples_per_second": 145.613, + "eval_scitail-pairs-pos_steps_per_second": 2.697, + "step": 32 + }, + { + "epoch": 0.1, + "eval_xsum-pairs_loss": 0.09131219983100891, + "eval_xsum-pairs_runtime": 2.8486, + "eval_xsum-pairs_samples_per_second": 44.934, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 32 + }, + { + "epoch": 0.1, + "eval_sciq_pairs_loss": 0.01965576782822609, + "eval_sciq_pairs_runtime": 3.6062, + "eval_sciq_pairs_samples_per_second": 35.494, + "eval_sciq_pairs_steps_per_second": 0.555, + "step": 32 + }, + { + "epoch": 0.1, + "eval_qasc_pairs_loss": 0.10996829718351364, + "eval_qasc_pairs_runtime": 0.5975, + "eval_qasc_pairs_samples_per_second": 214.235, + "eval_qasc_pairs_steps_per_second": 3.347, + "step": 32 + }, + { + "epoch": 0.1, + "eval_openbookqa_pairs_loss": 0.6932356953620911, + "eval_openbookqa_pairs_runtime": 0.5729, + "eval_openbookqa_pairs_samples_per_second": 223.415, + "eval_openbookqa_pairs_steps_per_second": 3.491, + "step": 32 + }, + { + "epoch": 0.1, + "eval_msmarco_pairs_loss": 0.32686129212379456, + "eval_msmarco_pairs_runtime": 1.4637, + "eval_msmarco_pairs_samples_per_second": 87.448, + "eval_msmarco_pairs_steps_per_second": 1.366, + "step": 32 + }, + { + "epoch": 0.1, + "eval_nq_pairs_loss": 0.1978442668914795, + "eval_nq_pairs_runtime": 2.8588, + "eval_nq_pairs_samples_per_second": 44.774, + "eval_nq_pairs_steps_per_second": 0.7, + "step": 32 + }, + { + "epoch": 0.1, + "eval_trivia_pairs_loss": 0.7432661652565002, + "eval_trivia_pairs_runtime": 4.3895, + "eval_trivia_pairs_samples_per_second": 29.16, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 32 + }, + { + "epoch": 0.1, + "eval_gooaq_pairs_loss": 0.3761173486709595, + "eval_gooaq_pairs_runtime": 1.0043, + "eval_gooaq_pairs_samples_per_second": 127.452, + "eval_gooaq_pairs_steps_per_second": 1.991, + "step": 32 + }, + { + "epoch": 0.1, + "eval_paws-pos_loss": 0.02476382441818714, + "eval_paws-pos_runtime": 0.6858, + "eval_paws-pos_samples_per_second": 186.635, + "eval_paws-pos_steps_per_second": 2.916, + "step": 32 + }, + { + "epoch": 0.103125, + "grad_norm": 3.51029109954834, + "learning_rate": 1.0312500000000002e-05, + "loss": 0.7783, + "step": 33 + }, + { + "epoch": 0.10625, + "grad_norm": 3.376455783843994, + "learning_rate": 1.0625e-05, + "loss": 0.8746, + "step": 34 + }, + { + "epoch": 0.109375, + "grad_norm": 2.7385308742523193, + "learning_rate": 1.0937500000000002e-05, + "loss": 0.5085, + "step": 35 + }, + { + "epoch": 0.1125, + "grad_norm": 2.782606840133667, + "learning_rate": 1.125e-05, + "loss": 0.4842, + "step": 36 + }, + { + "epoch": 0.115625, + "grad_norm": 3.4377782344818115, + "learning_rate": 1.1562500000000002e-05, + "loss": 0.8097, + "step": 37 + }, + { + "epoch": 0.11875, + "grad_norm": 2.6202378273010254, + "learning_rate": 1.1875e-05, + "loss": 0.5325, + "step": 38 + }, + { + "epoch": 0.121875, + "grad_norm": 3.0869128704071045, + "learning_rate": 1.2187500000000001e-05, + "loss": 0.7221, + "step": 39 + }, + { + "epoch": 0.125, + "grad_norm": 3.131516456604004, + "learning_rate": 1.25e-05, + "loss": 0.708, + "step": 40 + }, + { + "epoch": 0.128125, + "grad_norm": 2.0318033695220947, + "learning_rate": 1.2812500000000001e-05, + "loss": 0.2789, + "step": 41 + }, + { + "epoch": 0.13125, + "grad_norm": 3.2574217319488525, + "learning_rate": 1.3125e-05, + "loss": 0.7986, + "step": 42 + }, + { + "epoch": 0.134375, + "grad_norm": 3.6287729740142822, + "learning_rate": 1.3437500000000001e-05, + "loss": 0.9653, + "step": 43 + }, + { + "epoch": 0.1375, + "grad_norm": 3.1281752586364746, + "learning_rate": 1.375e-05, + "loss": 0.7857, + "step": 44 + }, + { + "epoch": 0.140625, + "grad_norm": 2.201566219329834, + "learning_rate": 1.4062500000000001e-05, + "loss": 0.2726, + "step": 45 + }, + { + "epoch": 0.14375, + "grad_norm": 1.8727688789367676, + "learning_rate": 1.4375e-05, + "loss": 0.2458, + "step": 46 + }, + { + "epoch": 0.146875, + "grad_norm": 3.156454086303711, + "learning_rate": 1.4687500000000001e-05, + "loss": 0.6988, + "step": 47 + }, + { + "epoch": 0.15, + "grad_norm": 3.0224971771240234, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.6328, + "step": 48 + }, + { + "epoch": 0.153125, + "grad_norm": 3.4717319011688232, + "learning_rate": 1.5312500000000003e-05, + "loss": 0.795, + "step": 49 + }, + { + "epoch": 0.15625, + "grad_norm": 2.8961374759674072, + "learning_rate": 1.5625e-05, + "loss": 0.6163, + "step": 50 + }, + { + "epoch": 0.159375, + "grad_norm": 3.667778491973877, + "learning_rate": 1.59375e-05, + "loss": 0.8269, + "step": 51 + }, + { + "epoch": 0.1625, + "grad_norm": 2.350587844848633, + "learning_rate": 1.6250000000000002e-05, + "loss": 0.52, + "step": 52 + }, + { + "epoch": 0.165625, + "grad_norm": 3.312248468399048, + "learning_rate": 1.6562500000000003e-05, + "loss": 0.7523, + "step": 53 + }, + { + "epoch": 0.16875, + "grad_norm": 2.8101534843444824, + "learning_rate": 1.6875e-05, + "loss": 0.6979, + "step": 54 + }, + { + "epoch": 0.171875, + "grad_norm": 3.144334077835083, + "learning_rate": 1.71875e-05, + "loss": 0.7845, + "step": 55 + }, + { + "epoch": 0.175, + "grad_norm": 3.671412229537964, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.9325, + "step": 56 + }, + { + "epoch": 0.178125, + "grad_norm": 3.204644203186035, + "learning_rate": 1.7812500000000003e-05, + "loss": 0.8546, + "step": 57 + }, + { + "epoch": 0.18125, + "grad_norm": 2.9951093196868896, + "learning_rate": 1.8125e-05, + "loss": 0.6392, + "step": 58 + }, + { + "epoch": 0.184375, + "grad_norm": 3.036386013031006, + "learning_rate": 1.84375e-05, + "loss": 0.5827, + "step": 59 + }, + { + "epoch": 0.1875, + "grad_norm": 3.0899698734283447, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.5961, + "step": 60 + }, + { + "epoch": 0.190625, + "grad_norm": 2.3574728965759277, + "learning_rate": 1.9062500000000003e-05, + "loss": 0.3625, + "step": 61 + }, + { + "epoch": 0.19375, + "grad_norm": 2.4232304096221924, + "learning_rate": 1.9375e-05, + "loss": 0.2584, + "step": 62 + }, + { + "epoch": 0.196875, + "grad_norm": 1.9016233682632446, + "learning_rate": 1.96875e-05, + "loss": 0.4047, + "step": 63 + }, + { + "epoch": 0.2, + "grad_norm": 3.193114995956421, + "learning_rate": 2e-05, + "loss": 0.9429, + "step": 64 + }, + { + "epoch": 0.2, + "eval_VitaminC_cosine_accuracy": 0.560546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8192525506019592, + "eval_VitaminC_cosine_ap": 0.5485465805560719, + "eval_VitaminC_cosine_f1": 0.6675531914893617, + "eval_VitaminC_cosine_f1_threshold": 0.30620089173316956, + "eval_VitaminC_cosine_precision": 0.500998003992016, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 308.60137939453125, + "eval_VitaminC_dot_ap": 0.5375184580780159, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 97.275634765625, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 11.976862907409668, + "eval_VitaminC_euclidean_ap": 0.5494925067012235, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 23.21343994140625, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 313.34185791015625, + "eval_VitaminC_manhattan_ap": 0.5475158315491966, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 495.06231689453125, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 313.34185791015625, + "eval_VitaminC_max_ap": 0.5494925067012235, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 495.06231689453125, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5494925067012235, + "eval_sts-test_pearson_cosine": 0.8681028367252808, + "eval_sts-test_pearson_dot": 0.8578643818026934, + "eval_sts-test_pearson_euclidean": 0.8913506886125709, + "eval_sts-test_pearson_manhattan": 0.8922209656727235, + "eval_sts-test_pearson_max": 0.8922209656727235, + "eval_sts-test_spearman_cosine": 0.8960442588011338, + "eval_sts-test_spearman_dot": 0.8606696844578128, + "eval_sts-test_spearman_euclidean": 0.8895474944286376, + "eval_sts-test_spearman_manhattan": 0.8895341585527426, + "eval_sts-test_spearman_max": 0.8960442588011338, + "eval_vitaminc-pairs_loss": 2.260099411010742, + "eval_vitaminc-pairs_runtime": 1.8392, + "eval_vitaminc-pairs_samples_per_second": 58.723, + "eval_vitaminc-pairs_steps_per_second": 1.087, + "step": 64 + }, + { + "epoch": 0.2, + "eval_negation-triplets_loss": 0.836820662021637, + "eval_negation-triplets_runtime": 0.294, + "eval_negation-triplets_samples_per_second": 217.7, + "eval_negation-triplets_steps_per_second": 3.402, + "step": 64 + }, + { + "epoch": 0.2, + "eval_scitail-pairs-pos_loss": 0.08362159878015518, + "eval_scitail-pairs-pos_runtime": 0.3686, + "eval_scitail-pairs-pos_samples_per_second": 146.509, + "eval_scitail-pairs-pos_steps_per_second": 2.713, + "step": 64 + }, + { + "epoch": 0.2, + "eval_xsum-pairs_loss": 0.08567425608634949, + "eval_xsum-pairs_runtime": 2.8489, + "eval_xsum-pairs_samples_per_second": 44.93, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 64 + }, + { + "epoch": 0.2, + "eval_sciq_pairs_loss": 0.019713517278432846, + "eval_sciq_pairs_runtime": 3.616, + "eval_sciq_pairs_samples_per_second": 35.399, + "eval_sciq_pairs_steps_per_second": 0.553, + "step": 64 + }, + { + "epoch": 0.2, + "eval_qasc_pairs_loss": 0.11403815448284149, + "eval_qasc_pairs_runtime": 0.6024, + "eval_qasc_pairs_samples_per_second": 212.48, + "eval_qasc_pairs_steps_per_second": 3.32, + "step": 64 + }, + { + "epoch": 0.2, + "eval_openbookqa_pairs_loss": 0.6793034076690674, + "eval_openbookqa_pairs_runtime": 0.5864, + "eval_openbookqa_pairs_samples_per_second": 218.266, + "eval_openbookqa_pairs_steps_per_second": 3.41, + "step": 64 + }, + { + "epoch": 0.2, + "eval_msmarco_pairs_loss": 0.34600257873535156, + "eval_msmarco_pairs_runtime": 1.4668, + "eval_msmarco_pairs_samples_per_second": 87.263, + "eval_msmarco_pairs_steps_per_second": 1.363, + "step": 64 + }, + { + "epoch": 0.2, + "eval_nq_pairs_loss": 0.22141708433628082, + "eval_nq_pairs_runtime": 2.8596, + "eval_nq_pairs_samples_per_second": 44.761, + "eval_nq_pairs_steps_per_second": 0.699, + "step": 64 + }, + { + "epoch": 0.2, + "eval_trivia_pairs_loss": 0.7303681969642639, + "eval_trivia_pairs_runtime": 4.3864, + "eval_trivia_pairs_samples_per_second": 29.181, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 64 + }, + { + "epoch": 0.2, + "eval_gooaq_pairs_loss": 0.38013964891433716, + "eval_gooaq_pairs_runtime": 1.0052, + "eval_gooaq_pairs_samples_per_second": 127.34, + "eval_gooaq_pairs_steps_per_second": 1.99, + "step": 64 + }, + { + "epoch": 0.2, + "eval_paws-pos_loss": 0.024541139602661133, + "eval_paws-pos_runtime": 0.6851, + "eval_paws-pos_samples_per_second": 186.844, + "eval_paws-pos_steps_per_second": 2.919, + "step": 64 + }, + { + "epoch": 0.203125, + "grad_norm": 3.5084540843963623, + "learning_rate": 2.0312500000000002e-05, + "loss": 0.7848, + "step": 65 + }, + { + "epoch": 0.20625, + "grad_norm": 3.749316453933716, + "learning_rate": 2.0625000000000003e-05, + "loss": 0.7589, + "step": 66 + }, + { + "epoch": 0.209375, + "grad_norm": 3.4131276607513428, + "learning_rate": 2.09375e-05, + "loss": 0.5905, + "step": 67 + }, + { + "epoch": 0.2125, + "grad_norm": 2.4543726444244385, + "learning_rate": 2.125e-05, + "loss": 0.4211, + "step": 68 + }, + { + "epoch": 0.215625, + "grad_norm": 2.6270904541015625, + "learning_rate": 2.1562500000000002e-05, + "loss": 0.5325, + "step": 69 + }, + { + "epoch": 0.21875, + "grad_norm": 2.2518444061279297, + "learning_rate": 2.1875000000000003e-05, + "loss": 0.3541, + "step": 70 + }, + { + "epoch": 0.221875, + "grad_norm": 3.88729190826416, + "learning_rate": 2.21875e-05, + "loss": 0.9396, + "step": 71 + }, + { + "epoch": 0.225, + "grad_norm": 3.2759203910827637, + "learning_rate": 2.25e-05, + "loss": 0.6997, + "step": 72 + }, + { + "epoch": 0.228125, + "grad_norm": 3.149787425994873, + "learning_rate": 2.2812500000000002e-05, + "loss": 0.6415, + "step": 73 + }, + { + "epoch": 0.23125, + "grad_norm": 4.01395845413208, + "learning_rate": 2.3125000000000003e-05, + "loss": 1.1966, + "step": 74 + }, + { + "epoch": 0.234375, + "grad_norm": 3.0432724952697754, + "learning_rate": 2.34375e-05, + "loss": 0.7142, + "step": 75 + }, + { + "epoch": 0.2375, + "grad_norm": 2.960078716278076, + "learning_rate": 2.375e-05, + "loss": 0.6048, + "step": 76 + }, + { + "epoch": 0.240625, + "grad_norm": 2.414846658706665, + "learning_rate": 2.4062500000000002e-05, + "loss": 0.4639, + "step": 77 + }, + { + "epoch": 0.24375, + "grad_norm": 4.241907119750977, + "learning_rate": 2.4375000000000003e-05, + "loss": 0.9391, + "step": 78 + }, + { + "epoch": 0.246875, + "grad_norm": 3.350724220275879, + "learning_rate": 2.46875e-05, + "loss": 0.6364, + "step": 79 + }, + { + "epoch": 0.25, + "grad_norm": 2.519324541091919, + "learning_rate": 2.5e-05, + "loss": 0.515, + "step": 80 + }, + { + "epoch": 0.253125, + "grad_norm": 3.655949592590332, + "learning_rate": 2.5312500000000002e-05, + "loss": 0.6505, + "step": 81 + }, + { + "epoch": 0.25625, + "grad_norm": 3.1521031856536865, + "learning_rate": 2.5625000000000003e-05, + "loss": 0.6149, + "step": 82 + }, + { + "epoch": 0.259375, + "grad_norm": 2.637176036834717, + "learning_rate": 2.5937500000000004e-05, + "loss": 0.4471, + "step": 83 + }, + { + "epoch": 0.2625, + "grad_norm": 4.223080158233643, + "learning_rate": 2.625e-05, + "loss": 1.4199, + "step": 84 + }, + { + "epoch": 0.265625, + "grad_norm": 3.141789436340332, + "learning_rate": 2.6562500000000002e-05, + "loss": 0.8484, + "step": 85 + }, + { + "epoch": 0.26875, + "grad_norm": 3.2342255115509033, + "learning_rate": 2.6875000000000003e-05, + "loss": 0.6412, + "step": 86 + }, + { + "epoch": 0.271875, + "grad_norm": 3.445375442504883, + "learning_rate": 2.7187500000000004e-05, + "loss": 0.65, + "step": 87 + }, + { + "epoch": 0.275, + "grad_norm": 3.395848035812378, + "learning_rate": 2.75e-05, + "loss": 0.7453, + "step": 88 + }, + { + "epoch": 0.278125, + "grad_norm": 3.752084493637085, + "learning_rate": 2.7812500000000002e-05, + "loss": 0.9506, + "step": 89 + }, + { + "epoch": 0.28125, + "grad_norm": 3.2424893379211426, + "learning_rate": 2.8125000000000003e-05, + "loss": 0.6083, + "step": 90 + }, + { + "epoch": 0.284375, + "grad_norm": 2.8851892948150635, + "learning_rate": 2.8437500000000003e-05, + "loss": 0.7102, + "step": 91 + }, + { + "epoch": 0.2875, + "grad_norm": 2.385157823562622, + "learning_rate": 2.875e-05, + "loss": 0.4037, + "step": 92 + }, + { + "epoch": 0.290625, + "grad_norm": 3.5539441108703613, + "learning_rate": 2.90625e-05, + "loss": 0.769, + "step": 93 + }, + { + "epoch": 0.29375, + "grad_norm": 3.686418056488037, + "learning_rate": 2.9375000000000003e-05, + "loss": 0.8765, + "step": 94 + }, + { + "epoch": 0.296875, + "grad_norm": 3.9195055961608887, + "learning_rate": 2.9687500000000003e-05, + "loss": 1.2583, + "step": 95 + }, + { + "epoch": 0.3, + "grad_norm": 3.5373759269714355, + "learning_rate": 3.0000000000000004e-05, + "loss": 0.8885, + "step": 96 + }, + { + "epoch": 0.3, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8407348990440369, + "eval_VitaminC_cosine_ap": 0.5524635737287826, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.2901695668697357, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 331.7409973144531, + "eval_VitaminC_dot_ap": 0.5393192469559877, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 104.93923950195312, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 10.979323387145996, + "eval_VitaminC_euclidean_ap": 0.5510789245842218, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 23.105466842651367, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55078125, + "eval_VitaminC_manhattan_accuracy_threshold": 228.8612060546875, + "eval_VitaminC_manhattan_ap": 0.550140326019901, + "eval_VitaminC_manhattan_f1": 0.6666666666666667, + "eval_VitaminC_manhattan_f1_threshold": 479.256103515625, + "eval_VitaminC_manhattan_precision": 0.501002004008016, + "eval_VitaminC_manhattan_recall": 0.9960159362549801, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 331.7409973144531, + "eval_VitaminC_max_ap": 0.5524635737287826, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 479.256103515625, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5524635737287826, + "eval_sts-test_pearson_cosine": 0.8707252459918289, + "eval_sts-test_pearson_dot": 0.8616721319399807, + "eval_sts-test_pearson_euclidean": 0.8926205493906139, + "eval_sts-test_pearson_manhattan": 0.8931067612799872, + "eval_sts-test_pearson_max": 0.8931067612799872, + "eval_sts-test_spearman_cosine": 0.8969095691913977, + "eval_sts-test_spearman_dot": 0.8614390033923923, + "eval_sts-test_spearman_euclidean": 0.8906887410966409, + "eval_sts-test_spearman_manhattan": 0.8902939007173846, + "eval_sts-test_spearman_max": 0.8969095691913977, + "eval_vitaminc-pairs_loss": 2.259434938430786, + "eval_vitaminc-pairs_runtime": 1.8587, + "eval_vitaminc-pairs_samples_per_second": 58.104, + "eval_vitaminc-pairs_steps_per_second": 1.076, + "step": 96 + }, + { + "epoch": 0.3, + "eval_negation-triplets_loss": 0.8346852660179138, + "eval_negation-triplets_runtime": 0.2932, + "eval_negation-triplets_samples_per_second": 218.315, + "eval_negation-triplets_steps_per_second": 3.411, + "step": 96 + }, + { + "epoch": 0.3, + "eval_scitail-pairs-pos_loss": 0.07568605989217758, + "eval_scitail-pairs-pos_runtime": 0.3763, + "eval_scitail-pairs-pos_samples_per_second": 143.494, + "eval_scitail-pairs-pos_steps_per_second": 2.657, + "step": 96 + }, + { + "epoch": 0.3, + "eval_xsum-pairs_loss": 0.08208194375038147, + "eval_xsum-pairs_runtime": 2.8486, + "eval_xsum-pairs_samples_per_second": 44.934, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 96 + }, + { + "epoch": 0.3, + "eval_sciq_pairs_loss": 0.020024314522743225, + "eval_sciq_pairs_runtime": 3.6173, + "eval_sciq_pairs_samples_per_second": 35.386, + "eval_sciq_pairs_steps_per_second": 0.553, + "step": 96 + }, + { + "epoch": 0.3, + "eval_qasc_pairs_loss": 0.10592304170131683, + "eval_qasc_pairs_runtime": 0.5997, + "eval_qasc_pairs_samples_per_second": 213.431, + "eval_qasc_pairs_steps_per_second": 3.335, + "step": 96 + }, + { + "epoch": 0.3, + "eval_openbookqa_pairs_loss": 0.6809090971946716, + "eval_openbookqa_pairs_runtime": 0.5752, + "eval_openbookqa_pairs_samples_per_second": 222.54, + "eval_openbookqa_pairs_steps_per_second": 3.477, + "step": 96 + }, + { + "epoch": 0.3, + "eval_msmarco_pairs_loss": 0.3400232195854187, + "eval_msmarco_pairs_runtime": 1.4679, + "eval_msmarco_pairs_samples_per_second": 87.202, + "eval_msmarco_pairs_steps_per_second": 1.363, + "step": 96 + }, + { + "epoch": 0.3, + "eval_nq_pairs_loss": 0.2074178159236908, + "eval_nq_pairs_runtime": 2.8593, + "eval_nq_pairs_samples_per_second": 44.766, + "eval_nq_pairs_steps_per_second": 0.699, + "step": 96 + }, + { + "epoch": 0.3, + "eval_trivia_pairs_loss": 0.7431399822235107, + "eval_trivia_pairs_runtime": 4.4162, + "eval_trivia_pairs_samples_per_second": 28.984, + "eval_trivia_pairs_steps_per_second": 0.453, + "step": 96 + }, + { + "epoch": 0.3, + "eval_gooaq_pairs_loss": 0.3708875775337219, + "eval_gooaq_pairs_runtime": 1.0094, + "eval_gooaq_pairs_samples_per_second": 126.81, + "eval_gooaq_pairs_steps_per_second": 1.981, + "step": 96 + }, + { + "epoch": 0.3, + "eval_paws-pos_loss": 0.024763749912381172, + "eval_paws-pos_runtime": 0.6874, + "eval_paws-pos_samples_per_second": 186.212, + "eval_paws-pos_steps_per_second": 2.91, + "step": 96 + }, + { + "epoch": 0.303125, + "grad_norm": 3.2354822158813477, + "learning_rate": 3.03125e-05, + "loss": 0.6398, + "step": 97 + }, + { + "epoch": 0.30625, + "grad_norm": 3.6665022373199463, + "learning_rate": 3.0625000000000006e-05, + "loss": 0.8263, + "step": 98 + }, + { + "epoch": 0.309375, + "grad_norm": 3.026954412460327, + "learning_rate": 3.09375e-05, + "loss": 0.8716, + "step": 99 + }, + { + "epoch": 0.3125, + "grad_norm": 2.445453643798828, + "learning_rate": 3.125e-05, + "loss": 0.5523, + "step": 100 + }, + { + "epoch": 0.315625, + "grad_norm": 3.4408035278320312, + "learning_rate": 3.15625e-05, + "loss": 0.5811, + "step": 101 + }, + { + "epoch": 0.31875, + "grad_norm": 2.8406240940093994, + "learning_rate": 3.1875e-05, + "loss": 0.7602, + "step": 102 + }, + { + "epoch": 0.321875, + "grad_norm": 2.5201492309570312, + "learning_rate": 3.21875e-05, + "loss": 0.5337, + "step": 103 + }, + { + "epoch": 0.325, + "grad_norm": 3.323239326477051, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.8182, + "step": 104 + }, + { + "epoch": 0.328125, + "grad_norm": 3.2463977336883545, + "learning_rate": 3.2812500000000005e-05, + "loss": 0.6641, + "step": 105 + }, + { + "epoch": 0.33125, + "grad_norm": 3.4495010375976562, + "learning_rate": 3.3125000000000006e-05, + "loss": 1.0088, + "step": 106 + }, + { + "epoch": 0.334375, + "grad_norm": 2.7572243213653564, + "learning_rate": 3.34375e-05, + "loss": 0.7556, + "step": 107 + }, + { + "epoch": 0.3375, + "grad_norm": 3.494549512863159, + "learning_rate": 3.375e-05, + "loss": 0.713, + "step": 108 + }, + { + "epoch": 0.340625, + "grad_norm": 3.4666013717651367, + "learning_rate": 3.40625e-05, + "loss": 0.8385, + "step": 109 + }, + { + "epoch": 0.34375, + "grad_norm": 3.05104660987854, + "learning_rate": 3.4375e-05, + "loss": 0.5181, + "step": 110 + }, + { + "epoch": 0.346875, + "grad_norm": 3.8259003162384033, + "learning_rate": 3.46875e-05, + "loss": 1.0939, + "step": 111 + }, + { + "epoch": 0.35, + "grad_norm": 3.287792205810547, + "learning_rate": 3.5000000000000004e-05, + "loss": 0.5826, + "step": 112 + }, + { + "epoch": 0.353125, + "grad_norm": 3.9174458980560303, + "learning_rate": 3.5312500000000005e-05, + "loss": 0.7121, + "step": 113 + }, + { + "epoch": 0.35625, + "grad_norm": 3.424893379211426, + "learning_rate": 3.5625000000000005e-05, + "loss": 0.9371, + "step": 114 + }, + { + "epoch": 0.359375, + "grad_norm": 3.5157482624053955, + "learning_rate": 3.5937500000000006e-05, + "loss": 0.7739, + "step": 115 + }, + { + "epoch": 0.3625, + "grad_norm": 4.468640327453613, + "learning_rate": 3.625e-05, + "loss": 0.9612, + "step": 116 + }, + { + "epoch": 0.365625, + "grad_norm": 3.4379608631134033, + "learning_rate": 3.65625e-05, + "loss": 0.7213, + "step": 117 + }, + { + "epoch": 0.36875, + "grad_norm": 2.9453623294830322, + "learning_rate": 3.6875e-05, + "loss": 0.621, + "step": 118 + }, + { + "epoch": 0.371875, + "grad_norm": 2.4365315437316895, + "learning_rate": 3.71875e-05, + "loss": 0.5503, + "step": 119 + }, + { + "epoch": 0.375, + "grad_norm": 3.446967124938965, + "learning_rate": 3.7500000000000003e-05, + "loss": 0.8439, + "step": 120 + }, + { + "epoch": 0.378125, + "grad_norm": 3.8797788619995117, + "learning_rate": 3.7812500000000004e-05, + "loss": 0.7813, + "step": 121 + }, + { + "epoch": 0.38125, + "grad_norm": 3.0103230476379395, + "learning_rate": 3.8125000000000005e-05, + "loss": 0.5637, + "step": 122 + }, + { + "epoch": 0.384375, + "grad_norm": 3.9547793865203857, + "learning_rate": 3.8437500000000006e-05, + "loss": 0.9052, + "step": 123 + }, + { + "epoch": 0.3875, + "grad_norm": 2.953261375427246, + "learning_rate": 3.875e-05, + "loss": 0.64, + "step": 124 + }, + { + "epoch": 0.390625, + "grad_norm": 2.914365768432617, + "learning_rate": 3.90625e-05, + "loss": 0.6529, + "step": 125 + }, + { + "epoch": 0.39375, + "grad_norm": 3.346844434738159, + "learning_rate": 3.9375e-05, + "loss": 0.6894, + "step": 126 + }, + { + "epoch": 0.396875, + "grad_norm": 3.946427583694458, + "learning_rate": 3.96875e-05, + "loss": 0.8604, + "step": 127 + }, + { + "epoch": 0.4, + "grad_norm": 3.3265583515167236, + "learning_rate": 4e-05, + "loss": 0.8503, + "step": 128 + }, + { + "epoch": 0.4, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.835027813911438, + "eval_VitaminC_cosine_ap": 0.5482054260732142, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.28428012132644653, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 321.1236572265625, + "eval_VitaminC_dot_ap": 0.5350248143918641, + "eval_VitaminC_dot_f1": 0.6649006622516557, + "eval_VitaminC_dot_f1_threshold": 94.1016616821289, + "eval_VitaminC_dot_precision": 0.498015873015873, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 13.260427474975586, + "eval_VitaminC_euclidean_ap": 0.551773706587656, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.911056518554688, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 273.4624328613281, + "eval_VitaminC_manhattan_ap": 0.5494410762635437, + "eval_VitaminC_manhattan_f1": 0.6666666666666667, + "eval_VitaminC_manhattan_f1_threshold": 472.7373046875, + "eval_VitaminC_manhattan_precision": 0.5020161290322581, + "eval_VitaminC_manhattan_recall": 0.9920318725099602, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 321.1236572265625, + "eval_VitaminC_max_ap": 0.551773706587656, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 472.7373046875, + "eval_VitaminC_max_precision": 0.5020161290322581, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.551773706587656, + "eval_sts-test_pearson_cosine": 0.8672675483925697, + "eval_sts-test_pearson_dot": 0.8586110849200466, + "eval_sts-test_pearson_euclidean": 0.8915515585715386, + "eval_sts-test_pearson_manhattan": 0.8913674606593633, + "eval_sts-test_pearson_max": 0.8915515585715386, + "eval_sts-test_spearman_cosine": 0.8969123885208655, + "eval_sts-test_spearman_dot": 0.8619306407500383, + "eval_sts-test_spearman_euclidean": 0.8903670690297594, + "eval_sts-test_spearman_manhattan": 0.890351227083227, + "eval_sts-test_spearman_max": 0.8969123885208655, + "eval_vitaminc-pairs_loss": 2.0338199138641357, + "eval_vitaminc-pairs_runtime": 1.8309, + "eval_vitaminc-pairs_samples_per_second": 58.988, + "eval_vitaminc-pairs_steps_per_second": 1.092, + "step": 128 + }, + { + "epoch": 0.4, + "eval_negation-triplets_loss": 0.7916581630706787, + "eval_negation-triplets_runtime": 0.2912, + "eval_negation-triplets_samples_per_second": 219.766, + "eval_negation-triplets_steps_per_second": 3.434, + "step": 128 + }, + { + "epoch": 0.4, + "eval_scitail-pairs-pos_loss": 0.07755717635154724, + "eval_scitail-pairs-pos_runtime": 0.3716, + "eval_scitail-pairs-pos_samples_per_second": 145.312, + "eval_scitail-pairs-pos_steps_per_second": 2.691, + "step": 128 + }, + { + "epoch": 0.4, + "eval_xsum-pairs_loss": 0.08196285367012024, + "eval_xsum-pairs_runtime": 2.852, + "eval_xsum-pairs_samples_per_second": 44.881, + "eval_xsum-pairs_steps_per_second": 0.701, + "step": 128 + }, + { + "epoch": 0.4, + "eval_sciq_pairs_loss": 0.020960956811904907, + "eval_sciq_pairs_runtime": 3.5913, + "eval_sciq_pairs_samples_per_second": 35.642, + "eval_sciq_pairs_steps_per_second": 0.557, + "step": 128 + }, + { + "epoch": 0.4, + "eval_qasc_pairs_loss": 0.11308694630861282, + "eval_qasc_pairs_runtime": 0.595, + "eval_qasc_pairs_samples_per_second": 215.137, + "eval_qasc_pairs_steps_per_second": 3.362, + "step": 128 + }, + { + "epoch": 0.4, + "eval_openbookqa_pairs_loss": 0.7888042330741882, + "eval_openbookqa_pairs_runtime": 0.5711, + "eval_openbookqa_pairs_samples_per_second": 224.114, + "eval_openbookqa_pairs_steps_per_second": 3.502, + "step": 128 + }, + { + "epoch": 0.4, + "eval_msmarco_pairs_loss": 0.3428971469402313, + "eval_msmarco_pairs_runtime": 1.465, + "eval_msmarco_pairs_samples_per_second": 87.373, + "eval_msmarco_pairs_steps_per_second": 1.365, + "step": 128 + }, + { + "epoch": 0.4, + "eval_nq_pairs_loss": 0.20846250653266907, + "eval_nq_pairs_runtime": 2.8581, + "eval_nq_pairs_samples_per_second": 44.786, + "eval_nq_pairs_steps_per_second": 0.7, + "step": 128 + }, + { + "epoch": 0.4, + "eval_trivia_pairs_loss": 0.7110738754272461, + "eval_trivia_pairs_runtime": 4.3917, + "eval_trivia_pairs_samples_per_second": 29.146, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 128 + }, + { + "epoch": 0.4, + "eval_gooaq_pairs_loss": 0.3744402229785919, + "eval_gooaq_pairs_runtime": 1.0043, + "eval_gooaq_pairs_samples_per_second": 127.448, + "eval_gooaq_pairs_steps_per_second": 1.991, + "step": 128 + }, + { + "epoch": 0.4, + "eval_paws-pos_loss": 0.024828137829899788, + "eval_paws-pos_runtime": 0.6859, + "eval_paws-pos_samples_per_second": 186.611, + "eval_paws-pos_steps_per_second": 2.916, + "step": 128 + }, + { + "epoch": 0.403125, + "grad_norm": 3.7963619232177734, + "learning_rate": 3.999971762923902e-05, + "loss": 0.8171, + "step": 129 + }, + { + "epoch": 0.40625, + "grad_norm": 3.987645387649536, + "learning_rate": 3.999887052758717e-05, + "loss": 1.0401, + "step": 130 + }, + { + "epoch": 0.409375, + "grad_norm": 2.653578758239746, + "learning_rate": 3.999745872693735e-05, + "loss": 0.4243, + "step": 131 + }, + { + "epoch": 0.4125, + "grad_norm": 2.3737175464630127, + "learning_rate": 3.9995482280443065e-05, + "loss": 0.3778, + "step": 132 + }, + { + "epoch": 0.415625, + "grad_norm": 3.334118127822876, + "learning_rate": 3.99929412625164e-05, + "loss": 0.7651, + "step": 133 + }, + { + "epoch": 0.41875, + "grad_norm": 3.5098752975463867, + "learning_rate": 3.998983576882524e-05, + "loss": 0.6003, + "step": 134 + }, + { + "epoch": 0.421875, + "grad_norm": 3.023698091506958, + "learning_rate": 3.9986165916289686e-05, + "loss": 0.6023, + "step": 135 + }, + { + "epoch": 0.425, + "grad_norm": 3.293668746948242, + "learning_rate": 3.998193184307759e-05, + "loss": 0.6079, + "step": 136 + }, + { + "epoch": 0.428125, + "grad_norm": 3.326125144958496, + "learning_rate": 3.997713370859942e-05, + "loss": 0.6206, + "step": 137 + }, + { + "epoch": 0.43125, + "grad_norm": 3.322040557861328, + "learning_rate": 3.997177169350224e-05, + "loss": 0.4694, + "step": 138 + }, + { + "epoch": 0.434375, + "grad_norm": 3.1219382286071777, + "learning_rate": 3.996584599966288e-05, + "loss": 0.7528, + "step": 139 + }, + { + "epoch": 0.4375, + "grad_norm": 3.7076480388641357, + "learning_rate": 3.9959356850180354e-05, + "loss": 0.8395, + "step": 140 + }, + { + "epoch": 0.440625, + "grad_norm": 3.1098551750183105, + "learning_rate": 3.995230448936749e-05, + "loss": 0.6689, + "step": 141 + }, + { + "epoch": 0.44375, + "grad_norm": 3.31339168548584, + "learning_rate": 3.9944689182741674e-05, + "loss": 0.6547, + "step": 142 + }, + { + "epoch": 0.446875, + "grad_norm": 4.2841386795043945, + "learning_rate": 3.99365112170149e-05, + "loss": 0.9242, + "step": 143 + }, + { + "epoch": 0.45, + "grad_norm": 4.0628132820129395, + "learning_rate": 3.992777090008296e-05, + "loss": 0.9496, + "step": 144 + }, + { + "epoch": 0.453125, + "grad_norm": 3.484614849090576, + "learning_rate": 3.9918468561013834e-05, + "loss": 0.6506, + "step": 145 + }, + { + "epoch": 0.45625, + "grad_norm": 3.4139559268951416, + "learning_rate": 3.990860455003534e-05, + "loss": 0.786, + "step": 146 + }, + { + "epoch": 0.459375, + "grad_norm": 3.4322853088378906, + "learning_rate": 3.9898179238521916e-05, + "loss": 0.7414, + "step": 147 + }, + { + "epoch": 0.4625, + "grad_norm": 2.660554885864258, + "learning_rate": 3.9887193018980654e-05, + "loss": 0.3978, + "step": 148 + }, + { + "epoch": 0.465625, + "grad_norm": 2.6429054737091064, + "learning_rate": 3.9875646305036494e-05, + "loss": 0.5635, + "step": 149 + }, + { + "epoch": 0.46875, + "grad_norm": 4.292131423950195, + "learning_rate": 3.98635395314167e-05, + "loss": 0.9466, + "step": 150 + }, + { + "epoch": 0.471875, + "grad_norm": 3.1115028858184814, + "learning_rate": 3.9850873153934456e-05, + "loss": 0.5251, + "step": 151 + }, + { + "epoch": 0.475, + "grad_norm": 3.307051181793213, + "learning_rate": 3.983764764947172e-05, + "loss": 0.6636, + "step": 152 + }, + { + "epoch": 0.478125, + "grad_norm": 3.807854652404785, + "learning_rate": 3.9823863515961245e-05, + "loss": 0.7834, + "step": 153 + }, + { + "epoch": 0.48125, + "grad_norm": 2.9957728385925293, + "learning_rate": 3.980952127236788e-05, + "loss": 0.6177, + "step": 154 + }, + { + "epoch": 0.484375, + "grad_norm": 3.3072471618652344, + "learning_rate": 3.979462145866898e-05, + "loss": 0.4558, + "step": 155 + }, + { + "epoch": 0.4875, + "grad_norm": 3.0199949741363525, + "learning_rate": 3.977916463583412e-05, + "loss": 0.5228, + "step": 156 + }, + { + "epoch": 0.490625, + "grad_norm": 2.8596651554107666, + "learning_rate": 3.9763151385803936e-05, + "loss": 0.5543, + "step": 157 + }, + { + "epoch": 0.49375, + "grad_norm": 3.0589263439178467, + "learning_rate": 3.974658231146825e-05, + "loss": 0.7127, + "step": 158 + }, + { + "epoch": 0.496875, + "grad_norm": 2.489602565765381, + "learning_rate": 3.9729458036643335e-05, + "loss": 0.4227, + "step": 159 + }, + { + "epoch": 0.5, + "grad_norm": 3.3471999168395996, + "learning_rate": 3.971177920604846e-05, + "loss": 0.5914, + "step": 160 + }, + { + "epoch": 0.5, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8433390855789185, + "eval_VitaminC_cosine_ap": 0.5529005025024077, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.3040446639060974, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55859375, + "eval_VitaminC_dot_accuracy_threshold": 309.7912902832031, + "eval_VitaminC_dot_ap": 0.5373200658982779, + "eval_VitaminC_dot_f1": 0.6666666666666666, + "eval_VitaminC_dot_f1_threshold": 122.78400421142578, + "eval_VitaminC_dot_precision": 0.5, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 11.011507034301758, + "eval_VitaminC_euclidean_ap": 0.5542686405562732, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 22.90133285522461, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 293.54693603515625, + "eval_VitaminC_manhattan_ap": 0.5529507613553954, + "eval_VitaminC_manhattan_f1": 0.6666666666666667, + "eval_VitaminC_manhattan_f1_threshold": 479.09588623046875, + "eval_VitaminC_manhattan_precision": 0.501002004008016, + "eval_VitaminC_manhattan_recall": 0.9960159362549801, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 309.7912902832031, + "eval_VitaminC_max_ap": 0.5542686405562732, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 479.09588623046875, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5542686405562732, + "eval_sts-test_pearson_cosine": 0.8717931331186477, + "eval_sts-test_pearson_dot": 0.8628985772297639, + "eval_sts-test_pearson_euclidean": 0.8935960577585327, + "eval_sts-test_pearson_manhattan": 0.8926162242871916, + "eval_sts-test_pearson_max": 0.8935960577585327, + "eval_sts-test_spearman_cosine": 0.8989036406477372, + "eval_sts-test_spearman_dot": 0.8620115510306339, + "eval_sts-test_spearman_euclidean": 0.8911198747488857, + "eval_sts-test_spearman_manhattan": 0.8899440801070879, + "eval_sts-test_spearman_max": 0.8989036406477372, + "eval_vitaminc-pairs_loss": 2.0564281940460205, + "eval_vitaminc-pairs_runtime": 1.8511, + "eval_vitaminc-pairs_samples_per_second": 58.343, + "eval_vitaminc-pairs_steps_per_second": 1.08, + "step": 160 + }, + { + "epoch": 0.5, + "eval_negation-triplets_loss": 0.7865684032440186, + "eval_negation-triplets_runtime": 0.2987, + "eval_negation-triplets_samples_per_second": 214.291, + "eval_negation-triplets_steps_per_second": 3.348, + "step": 160 + }, + { + "epoch": 0.5, + "eval_scitail-pairs-pos_loss": 0.09969007223844528, + "eval_scitail-pairs-pos_runtime": 0.384, + "eval_scitail-pairs-pos_samples_per_second": 140.615, + "eval_scitail-pairs-pos_steps_per_second": 2.604, + "step": 160 + }, + { + "epoch": 0.5, + "eval_xsum-pairs_loss": 0.08461853861808777, + "eval_xsum-pairs_runtime": 2.8533, + "eval_xsum-pairs_samples_per_second": 44.86, + "eval_xsum-pairs_steps_per_second": 0.701, + "step": 160 + }, + { + "epoch": 0.5, + "eval_sciq_pairs_loss": 0.020078735426068306, + "eval_sciq_pairs_runtime": 3.6458, + "eval_sciq_pairs_samples_per_second": 35.109, + "eval_sciq_pairs_steps_per_second": 0.549, + "step": 160 + }, + { + "epoch": 0.5, + "eval_qasc_pairs_loss": 0.12362705171108246, + "eval_qasc_pairs_runtime": 0.6028, + "eval_qasc_pairs_samples_per_second": 212.356, + "eval_qasc_pairs_steps_per_second": 3.318, + "step": 160 + }, + { + "epoch": 0.5, + "eval_openbookqa_pairs_loss": 0.6668081283569336, + "eval_openbookqa_pairs_runtime": 0.5784, + "eval_openbookqa_pairs_samples_per_second": 221.308, + "eval_openbookqa_pairs_steps_per_second": 3.458, + "step": 160 + }, + { + "epoch": 0.5, + "eval_msmarco_pairs_loss": 0.32913729548454285, + "eval_msmarco_pairs_runtime": 1.4669, + "eval_msmarco_pairs_samples_per_second": 87.26, + "eval_msmarco_pairs_steps_per_second": 1.363, + "step": 160 + }, + { + "epoch": 0.5, + "eval_nq_pairs_loss": 0.2085198312997818, + "eval_nq_pairs_runtime": 2.8644, + "eval_nq_pairs_samples_per_second": 44.687, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 160 + }, + { + "epoch": 0.5, + "eval_trivia_pairs_loss": 0.7138605117797852, + "eval_trivia_pairs_runtime": 4.3915, + "eval_trivia_pairs_samples_per_second": 29.147, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 160 + }, + { + "epoch": 0.5, + "eval_gooaq_pairs_loss": 0.3919322192668915, + "eval_gooaq_pairs_runtime": 1.004, + "eval_gooaq_pairs_samples_per_second": 127.484, + "eval_gooaq_pairs_steps_per_second": 1.992, + "step": 160 + }, + { + "epoch": 0.5, + "eval_paws-pos_loss": 0.025703923776745796, + "eval_paws-pos_runtime": 0.6869, + "eval_paws-pos_samples_per_second": 186.332, + "eval_paws-pos_steps_per_second": 2.911, + "step": 160 + }, + { + "epoch": 0.503125, + "grad_norm": 2.7484354972839355, + "learning_rate": 3.9693546485281616e-05, + "loss": 0.3874, + "step": 161 + }, + { + "epoch": 0.50625, + "grad_norm": 3.9011173248291016, + "learning_rate": 3.967476056079441e-05, + "loss": 0.8134, + "step": 162 + }, + { + "epoch": 0.509375, + "grad_norm": 3.723893642425537, + "learning_rate": 3.9655422139866315e-05, + "loss": 0.5596, + "step": 163 + }, + { + "epoch": 0.5125, + "grad_norm": 1.8328720331192017, + "learning_rate": 3.963553195057793e-05, + "loss": 0.2877, + "step": 164 + }, + { + "epoch": 0.515625, + "grad_norm": 2.9615490436553955, + "learning_rate": 3.9615090741783634e-05, + "loss": 0.5218, + "step": 165 + }, + { + "epoch": 0.51875, + "grad_norm": 3.041154146194458, + "learning_rate": 3.959409928308341e-05, + "loss": 0.5282, + "step": 166 + }, + { + "epoch": 0.521875, + "grad_norm": 3.439157247543335, + "learning_rate": 3.957255836479377e-05, + "loss": 0.7528, + "step": 167 + }, + { + "epoch": 0.525, + "grad_norm": 3.576984405517578, + "learning_rate": 3.955046879791816e-05, + "loss": 0.7174, + "step": 168 + }, + { + "epoch": 0.528125, + "grad_norm": 3.1042630672454834, + "learning_rate": 3.952783141411626e-05, + "loss": 0.6902, + "step": 169 + }, + { + "epoch": 0.53125, + "grad_norm": 3.0211422443389893, + "learning_rate": 3.9504647065672785e-05, + "loss": 0.7486, + "step": 170 + }, + { + "epoch": 0.534375, + "grad_norm": 3.5162508487701416, + "learning_rate": 3.9480916625465344e-05, + "loss": 0.6333, + "step": 171 + }, + { + "epoch": 0.5375, + "grad_norm": 3.9070920944213867, + "learning_rate": 3.9456640986931606e-05, + "loss": 1.2932, + "step": 172 + }, + { + "epoch": 0.540625, + "grad_norm": 3.548743724822998, + "learning_rate": 3.943182106403563e-05, + "loss": 0.6259, + "step": 173 + }, + { + "epoch": 0.54375, + "grad_norm": 3.64949893951416, + "learning_rate": 3.940645779123349e-05, + "loss": 0.8357, + "step": 174 + }, + { + "epoch": 0.546875, + "grad_norm": 2.4284133911132812, + "learning_rate": 3.938055212343807e-05, + "loss": 0.3604, + "step": 175 + }, + { + "epoch": 0.55, + "grad_norm": 2.9141008853912354, + "learning_rate": 3.9354105035983135e-05, + "loss": 0.6598, + "step": 176 + }, + { + "epoch": 0.553125, + "grad_norm": 2.0430235862731934, + "learning_rate": 3.932711752458657e-05, + "loss": 0.3169, + "step": 177 + }, + { + "epoch": 0.55625, + "grad_norm": 3.522728204727173, + "learning_rate": 3.929959060531291e-05, + "loss": 0.8629, + "step": 178 + }, + { + "epoch": 0.559375, + "grad_norm": 2.419400453567505, + "learning_rate": 3.927152531453513e-05, + "loss": 0.3648, + "step": 179 + }, + { + "epoch": 0.5625, + "grad_norm": 2.826747417449951, + "learning_rate": 3.924292270889555e-05, + "loss": 0.5103, + "step": 180 + }, + { + "epoch": 0.565625, + "grad_norm": 3.2149524688720703, + "learning_rate": 3.921378386526612e-05, + "loss": 0.6255, + "step": 181 + }, + { + "epoch": 0.56875, + "grad_norm": 2.2112457752227783, + "learning_rate": 3.918410988070782e-05, + "loss": 0.4382, + "step": 182 + }, + { + "epoch": 0.571875, + "grad_norm": 2.301940441131592, + "learning_rate": 3.915390187242941e-05, + "loss": 0.4647, + "step": 183 + }, + { + "epoch": 0.575, + "grad_norm": 2.272001266479492, + "learning_rate": 3.912316097774532e-05, + "loss": 0.4218, + "step": 184 + }, + { + "epoch": 0.578125, + "grad_norm": 3.77436900138855, + "learning_rate": 3.909188835403285e-05, + "loss": 0.8244, + "step": 185 + }, + { + "epoch": 0.58125, + "grad_norm": 3.236813545227051, + "learning_rate": 3.906008517868863e-05, + "loss": 0.6579, + "step": 186 + }, + { + "epoch": 0.584375, + "grad_norm": 3.1845405101776123, + "learning_rate": 3.9027752649084215e-05, + "loss": 0.8384, + "step": 187 + }, + { + "epoch": 0.5875, + "grad_norm": 2.709747791290283, + "learning_rate": 3.899489198252108e-05, + "loss": 0.5266, + "step": 188 + }, + { + "epoch": 0.590625, + "grad_norm": 2.5210235118865967, + "learning_rate": 3.896150441618476e-05, + "loss": 0.5079, + "step": 189 + }, + { + "epoch": 0.59375, + "grad_norm": 1.9979658126831055, + "learning_rate": 3.892759120709824e-05, + "loss": 0.2574, + "step": 190 + }, + { + "epoch": 0.596875, + "grad_norm": 2.4257137775421143, + "learning_rate": 3.8893153632074675e-05, + "loss": 0.4162, + "step": 191 + }, + { + "epoch": 0.6, + "grad_norm": 3.482635021209717, + "learning_rate": 3.88581929876693e-05, + "loss": 0.7872, + "step": 192 + }, + { + "epoch": 0.6, + "eval_VitaminC_cosine_accuracy": 0.564453125, + "eval_VitaminC_cosine_accuracy_threshold": 0.737064003944397, + "eval_VitaminC_cosine_ap": 0.5553950127875514, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.312030553817749, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5625, + "eval_VitaminC_dot_accuracy_threshold": 326.57232666015625, + "eval_VitaminC_dot_ap": 0.5370581483003721, + "eval_VitaminC_dot_f1": 0.6649006622516557, + "eval_VitaminC_dot_f1_threshold": 116.00311279296875, + "eval_VitaminC_dot_precision": 0.498015873015873, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 13.492112159729004, + "eval_VitaminC_euclidean_ap": 0.5536857778177137, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.840118408203125, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5625, + "eval_VitaminC_manhattan_accuracy_threshold": 306.6820983886719, + "eval_VitaminC_manhattan_ap": 0.5520101545849081, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 490.146728515625, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.564453125, + "eval_VitaminC_max_accuracy_threshold": 326.57232666015625, + "eval_VitaminC_max_ap": 0.5553950127875514, + "eval_VitaminC_max_f1": 0.6666666666666666, + "eval_VitaminC_max_f1_threshold": 490.146728515625, + "eval_VitaminC_max_precision": 0.5, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5553950127875514, + "eval_sts-test_pearson_cosine": 0.8705183135475563, + "eval_sts-test_pearson_dot": 0.8575572680200927, + "eval_sts-test_pearson_euclidean": 0.894961141451468, + "eval_sts-test_pearson_manhattan": 0.8946364485546632, + "eval_sts-test_pearson_max": 0.894961141451468, + "eval_sts-test_spearman_cosine": 0.8981581293842179, + "eval_sts-test_spearman_dot": 0.8574014998383989, + "eval_sts-test_spearman_euclidean": 0.8924189591158167, + "eval_sts-test_spearman_manhattan": 0.8920942887144219, + "eval_sts-test_spearman_max": 0.8981581293842179, + "eval_vitaminc-pairs_loss": 2.066204786300659, + "eval_vitaminc-pairs_runtime": 1.8428, + "eval_vitaminc-pairs_samples_per_second": 58.608, + "eval_vitaminc-pairs_steps_per_second": 1.085, + "step": 192 + }, + { + "epoch": 0.6, + "eval_negation-triplets_loss": 0.763123095035553, + "eval_negation-triplets_runtime": 0.297, + "eval_negation-triplets_samples_per_second": 215.511, + "eval_negation-triplets_steps_per_second": 3.367, + "step": 192 + }, + { + "epoch": 0.6, + "eval_scitail-pairs-pos_loss": 0.07364190369844437, + "eval_scitail-pairs-pos_runtime": 0.3662, + "eval_scitail-pairs-pos_samples_per_second": 147.451, + "eval_scitail-pairs-pos_steps_per_second": 2.731, + "step": 192 + }, + { + "epoch": 0.6, + "eval_xsum-pairs_loss": 0.06735075265169144, + "eval_xsum-pairs_runtime": 2.8409, + "eval_xsum-pairs_samples_per_second": 45.056, + "eval_xsum-pairs_steps_per_second": 0.704, + "step": 192 + }, + { + "epoch": 0.6, + "eval_sciq_pairs_loss": 0.01930728368461132, + "eval_sciq_pairs_runtime": 3.6003, + "eval_sciq_pairs_samples_per_second": 35.552, + "eval_sciq_pairs_steps_per_second": 0.556, + "step": 192 + }, + { + "epoch": 0.6, + "eval_qasc_pairs_loss": 0.11278136074542999, + "eval_qasc_pairs_runtime": 0.5997, + "eval_qasc_pairs_samples_per_second": 213.437, + "eval_qasc_pairs_steps_per_second": 3.335, + "step": 192 + }, + { + "epoch": 0.6, + "eval_openbookqa_pairs_loss": 0.7505559921264648, + "eval_openbookqa_pairs_runtime": 0.5774, + "eval_openbookqa_pairs_samples_per_second": 221.691, + "eval_openbookqa_pairs_steps_per_second": 3.464, + "step": 192 + }, + { + "epoch": 0.6, + "eval_msmarco_pairs_loss": 0.33166375756263733, + "eval_msmarco_pairs_runtime": 1.4619, + "eval_msmarco_pairs_samples_per_second": 87.558, + "eval_msmarco_pairs_steps_per_second": 1.368, + "step": 192 + }, + { + "epoch": 0.6, + "eval_nq_pairs_loss": 0.21051406860351562, + "eval_nq_pairs_runtime": 2.858, + "eval_nq_pairs_samples_per_second": 44.786, + "eval_nq_pairs_steps_per_second": 0.7, + "step": 192 + }, + { + "epoch": 0.6, + "eval_trivia_pairs_loss": 0.7072564363479614, + "eval_trivia_pairs_runtime": 4.3854, + "eval_trivia_pairs_samples_per_second": 29.187, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 192 + }, + { + "epoch": 0.6, + "eval_gooaq_pairs_loss": 0.3748788833618164, + "eval_gooaq_pairs_runtime": 1.0024, + "eval_gooaq_pairs_samples_per_second": 127.692, + "eval_gooaq_pairs_steps_per_second": 1.995, + "step": 192 + }, + { + "epoch": 0.6, + "eval_paws-pos_loss": 0.025185449048876762, + "eval_paws-pos_runtime": 0.6844, + "eval_paws-pos_samples_per_second": 187.016, + "eval_paws-pos_steps_per_second": 2.922, + "step": 192 + }, + { + "epoch": 0.603125, + "grad_norm": 1.527544617652893, + "learning_rate": 3.882271059013064e-05, + "loss": 0.2606, + "step": 193 + }, + { + "epoch": 0.60625, + "grad_norm": 3.647446870803833, + "learning_rate": 3.878670777535087e-05, + "loss": 0.8808, + "step": 194 + }, + { + "epoch": 0.609375, + "grad_norm": 3.806488275527954, + "learning_rate": 3.875018589881564e-05, + "loss": 0.7685, + "step": 195 + }, + { + "epoch": 0.6125, + "grad_norm": 2.9896490573883057, + "learning_rate": 3.871314633555296e-05, + "loss": 0.7186, + "step": 196 + }, + { + "epoch": 0.615625, + "grad_norm": 1.31754732131958, + "learning_rate": 3.8675590480081455e-05, + "loss": 0.1147, + "step": 197 + }, + { + "epoch": 0.61875, + "grad_norm": 2.025834798812866, + "learning_rate": 3.863751974635784e-05, + "loss": 0.2816, + "step": 198 + }, + { + "epoch": 0.621875, + "grad_norm": 2.5674166679382324, + "learning_rate": 3.8598935567723734e-05, + "loss": 0.506, + "step": 199 + }, + { + "epoch": 0.625, + "grad_norm": 3.270737648010254, + "learning_rate": 3.8559839396851656e-05, + "loss": 0.5699, + "step": 200 + }, + { + "epoch": 0.628125, + "grad_norm": 1.6074001789093018, + "learning_rate": 3.852023270569033e-05, + "loss": 0.2746, + "step": 201 + }, + { + "epoch": 0.63125, + "grad_norm": 3.736549139022827, + "learning_rate": 3.8480116985409306e-05, + "loss": 0.7131, + "step": 202 + }, + { + "epoch": 0.634375, + "grad_norm": 3.9329938888549805, + "learning_rate": 3.843949374634278e-05, + "loss": 0.9307, + "step": 203 + }, + { + "epoch": 0.6375, + "grad_norm": 3.110591173171997, + "learning_rate": 3.839836451793273e-05, + "loss": 0.6033, + "step": 204 + }, + { + "epoch": 0.640625, + "grad_norm": 3.889007091522217, + "learning_rate": 3.8356730848671374e-05, + "loss": 0.7203, + "step": 205 + }, + { + "epoch": 0.64375, + "grad_norm": 3.2738683223724365, + "learning_rate": 3.8314594306042813e-05, + "loss": 0.7422, + "step": 206 + }, + { + "epoch": 0.646875, + "grad_norm": 3.077531099319458, + "learning_rate": 3.827195647646407e-05, + "loss": 0.6955, + "step": 207 + }, + { + "epoch": 0.65, + "grad_norm": 3.336914539337158, + "learning_rate": 3.822881896522533e-05, + "loss": 0.7139, + "step": 208 + }, + { + "epoch": 0.653125, + "grad_norm": 2.866854429244995, + "learning_rate": 3.818518339642951e-05, + "loss": 0.4741, + "step": 209 + }, + { + "epoch": 0.65625, + "grad_norm": 1.8859411478042603, + "learning_rate": 3.81410514129311e-05, + "loss": 0.2658, + "step": 210 + }, + { + "epoch": 0.659375, + "grad_norm": 2.938387870788574, + "learning_rate": 3.809642467627435e-05, + "loss": 0.6033, + "step": 211 + }, + { + "epoch": 0.6625, + "grad_norm": 3.269779920578003, + "learning_rate": 3.805130486663068e-05, + "loss": 0.7776, + "step": 212 + }, + { + "epoch": 0.665625, + "grad_norm": 2.8948724269866943, + "learning_rate": 3.800569368273539e-05, + "loss": 0.6791, + "step": 213 + }, + { + "epoch": 0.66875, + "grad_norm": 2.962749719619751, + "learning_rate": 3.795959284182381e-05, + "loss": 0.4367, + "step": 214 + }, + { + "epoch": 0.671875, + "grad_norm": 3.2313294410705566, + "learning_rate": 3.791300407956651e-05, + "loss": 0.7212, + "step": 215 + }, + { + "epoch": 0.675, + "grad_norm": 3.182274580001831, + "learning_rate": 3.7865929150004086e-05, + "loss": 0.7797, + "step": 216 + }, + { + "epoch": 0.678125, + "grad_norm": 2.7317817211151123, + "learning_rate": 3.781836982548101e-05, + "loss": 0.4547, + "step": 217 + }, + { + "epoch": 0.68125, + "grad_norm": 3.443126916885376, + "learning_rate": 3.777032789657898e-05, + "loss": 0.6771, + "step": 218 + }, + { + "epoch": 0.684375, + "grad_norm": 2.923877000808716, + "learning_rate": 3.772180517204946e-05, + "loss": 0.5488, + "step": 219 + }, + { + "epoch": 0.6875, + "grad_norm": 2.897601366043091, + "learning_rate": 3.767280347874561e-05, + "loss": 0.7352, + "step": 220 + }, + { + "epoch": 0.690625, + "grad_norm": 4.06088924407959, + "learning_rate": 3.762332466155348e-05, + "loss": 0.9567, + "step": 221 + }, + { + "epoch": 0.69375, + "grad_norm": 2.582475423812866, + "learning_rate": 3.7573370583322575e-05, + "loss": 0.4274, + "step": 222 + }, + { + "epoch": 0.696875, + "grad_norm": 3.7177348136901855, + "learning_rate": 3.7522943124795706e-05, + "loss": 0.7653, + "step": 223 + }, + { + "epoch": 0.7, + "grad_norm": 2.9613823890686035, + "learning_rate": 3.7472044184538186e-05, + "loss": 0.5672, + "step": 224 + }, + { + "epoch": 0.7, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8318229913711548, + "eval_VitaminC_cosine_ap": 0.5483869647391425, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2898828089237213, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 280.6613464355469, + "eval_VitaminC_dot_ap": 0.5352389087249884, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 127.79656982421875, + "eval_VitaminC_dot_precision": 0.501002004008016, + "eval_VitaminC_dot_recall": 0.9960159362549801, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 15.362771987915039, + "eval_VitaminC_euclidean_ap": 0.5487471191186046, + "eval_VitaminC_euclidean_f1": 0.6657789613848203, + "eval_VitaminC_euclidean_f1_threshold": 23.0285587310791, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 0.9960159362549801, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 304.9786376953125, + "eval_VitaminC_manhattan_ap": 0.5448852224007886, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 503.7974548339844, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 304.9786376953125, + "eval_VitaminC_max_ap": 0.5487471191186046, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 503.7974548339844, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5487471191186046, + "eval_sts-test_pearson_cosine": 0.8727242216490746, + "eval_sts-test_pearson_dot": 0.8620679649117718, + "eval_sts-test_pearson_euclidean": 0.8961291746213003, + "eval_sts-test_pearson_manhattan": 0.8961616445842001, + "eval_sts-test_pearson_max": 0.8961616445842001, + "eval_sts-test_spearman_cosine": 0.9004602237727143, + "eval_sts-test_spearman_dot": 0.8617584826474656, + "eval_sts-test_spearman_euclidean": 0.8945701970021624, + "eval_sts-test_spearman_manhattan": 0.8942019836234342, + "eval_sts-test_spearman_max": 0.9004602237727143, + "eval_vitaminc-pairs_loss": 2.07513689994812, + "eval_vitaminc-pairs_runtime": 1.8489, + "eval_vitaminc-pairs_samples_per_second": 58.414, + "eval_vitaminc-pairs_steps_per_second": 1.082, + "step": 224 + }, + { + "epoch": 0.7, + "eval_negation-triplets_loss": 0.7822766900062561, + "eval_negation-triplets_runtime": 0.2948, + "eval_negation-triplets_samples_per_second": 217.093, + "eval_negation-triplets_steps_per_second": 3.392, + "step": 224 + }, + { + "epoch": 0.7, + "eval_scitail-pairs-pos_loss": 0.084584079682827, + "eval_scitail-pairs-pos_runtime": 0.365, + "eval_scitail-pairs-pos_samples_per_second": 147.944, + "eval_scitail-pairs-pos_steps_per_second": 2.74, + "step": 224 + }, + { + "epoch": 0.7, + "eval_xsum-pairs_loss": 0.05927089601755142, + "eval_xsum-pairs_runtime": 2.8461, + "eval_xsum-pairs_samples_per_second": 44.974, + "eval_xsum-pairs_steps_per_second": 0.703, + "step": 224 + }, + { + "epoch": 0.7, + "eval_sciq_pairs_loss": 0.019030971452593803, + "eval_sciq_pairs_runtime": 3.6465, + "eval_sciq_pairs_samples_per_second": 35.102, + "eval_sciq_pairs_steps_per_second": 0.548, + "step": 224 + }, + { + "epoch": 0.7, + "eval_qasc_pairs_loss": 0.12519867718219757, + "eval_qasc_pairs_runtime": 0.6003, + "eval_qasc_pairs_samples_per_second": 213.235, + "eval_qasc_pairs_steps_per_second": 3.332, + "step": 224 + }, + { + "epoch": 0.7, + "eval_openbookqa_pairs_loss": 0.7141773700714111, + "eval_openbookqa_pairs_runtime": 0.5753, + "eval_openbookqa_pairs_samples_per_second": 222.508, + "eval_openbookqa_pairs_steps_per_second": 3.477, + "step": 224 + }, + { + "epoch": 0.7, + "eval_msmarco_pairs_loss": 0.3040487468242645, + "eval_msmarco_pairs_runtime": 1.4648, + "eval_msmarco_pairs_samples_per_second": 87.383, + "eval_msmarco_pairs_steps_per_second": 1.365, + "step": 224 + }, + { + "epoch": 0.7, + "eval_nq_pairs_loss": 0.1808711141347885, + "eval_nq_pairs_runtime": 2.8595, + "eval_nq_pairs_samples_per_second": 44.764, + "eval_nq_pairs_steps_per_second": 0.699, + "step": 224 + }, + { + "epoch": 0.7, + "eval_trivia_pairs_loss": 0.7160522937774658, + "eval_trivia_pairs_runtime": 4.3875, + "eval_trivia_pairs_samples_per_second": 29.174, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 224 + }, + { + "epoch": 0.7, + "eval_gooaq_pairs_loss": 0.3398577868938446, + "eval_gooaq_pairs_runtime": 1.0189, + "eval_gooaq_pairs_samples_per_second": 125.631, + "eval_gooaq_pairs_steps_per_second": 1.963, + "step": 224 + }, + { + "epoch": 0.7, + "eval_paws-pos_loss": 0.0250654686242342, + "eval_paws-pos_runtime": 0.6965, + "eval_paws-pos_samples_per_second": 183.765, + "eval_paws-pos_steps_per_second": 2.871, + "step": 224 + }, + { + "epoch": 0.703125, + "grad_norm": 2.7675271034240723, + "learning_rate": 3.742067567886634e-05, + "loss": 0.6116, + "step": 225 + }, + { + "epoch": 0.70625, + "grad_norm": 3.1136417388916016, + "learning_rate": 3.7368839541775386e-05, + "loss": 0.6484, + "step": 226 + }, + { + "epoch": 0.709375, + "grad_norm": 3.1425583362579346, + "learning_rate": 3.731653772486657e-05, + "loss": 0.669, + "step": 227 + }, + { + "epoch": 0.7125, + "grad_norm": 1.8860105276107788, + "learning_rate": 3.726377219727376e-05, + "loss": 0.263, + "step": 228 + }, + { + "epoch": 0.715625, + "grad_norm": 2.6990439891815186, + "learning_rate": 3.721054494558923e-05, + "loss": 0.6181, + "step": 229 + }, + { + "epoch": 0.71875, + "grad_norm": 3.836609363555908, + "learning_rate": 3.7156857973788926e-05, + "loss": 0.8956, + "step": 230 + }, + { + "epoch": 0.721875, + "grad_norm": 3.0837268829345703, + "learning_rate": 3.710271330315699e-05, + "loss": 0.5363, + "step": 231 + }, + { + "epoch": 0.725, + "grad_norm": 3.639112710952759, + "learning_rate": 3.704811297220967e-05, + "loss": 0.823, + "step": 232 + }, + { + "epoch": 0.728125, + "grad_norm": 3.301112651824951, + "learning_rate": 3.699305903661858e-05, + "loss": 0.7795, + "step": 233 + }, + { + "epoch": 0.73125, + "grad_norm": 2.289018154144287, + "learning_rate": 3.693755356913326e-05, + "loss": 0.3688, + "step": 234 + }, + { + "epoch": 0.734375, + "grad_norm": 2.259490966796875, + "learning_rate": 3.688159865950319e-05, + "loss": 0.3835, + "step": 235 + }, + { + "epoch": 0.7375, + "grad_norm": 2.2043821811676025, + "learning_rate": 3.6825196414399096e-05, + "loss": 0.3393, + "step": 236 + }, + { + "epoch": 0.740625, + "grad_norm": 2.6866259574890137, + "learning_rate": 3.6768348957333635e-05, + "loss": 0.4792, + "step": 237 + }, + { + "epoch": 0.74375, + "grad_norm": 2.561917304992676, + "learning_rate": 3.671105842858142e-05, + "loss": 0.3966, + "step": 238 + }, + { + "epoch": 0.746875, + "grad_norm": 2.1512343883514404, + "learning_rate": 3.6653326985098486e-05, + "loss": 0.2902, + "step": 239 + }, + { + "epoch": 0.75, + "grad_norm": 3.7423007488250732, + "learning_rate": 3.659515680044106e-05, + "loss": 0.6716, + "step": 240 + }, + { + "epoch": 0.753125, + "grad_norm": 2.6502630710601807, + "learning_rate": 3.65365500646837e-05, + "loss": 0.6783, + "step": 241 + }, + { + "epoch": 0.75625, + "grad_norm": 2.8291828632354736, + "learning_rate": 3.6477508984336886e-05, + "loss": 0.4794, + "step": 242 + }, + { + "epoch": 0.759375, + "grad_norm": 3.7910561561584473, + "learning_rate": 3.641803578226393e-05, + "loss": 0.8283, + "step": 243 + }, + { + "epoch": 0.7625, + "grad_norm": 3.3968613147735596, + "learning_rate": 3.635813269759727e-05, + "loss": 0.6875, + "step": 244 + }, + { + "epoch": 0.765625, + "grad_norm": 3.5861093997955322, + "learning_rate": 3.629780198565419e-05, + "loss": 0.8384, + "step": 245 + }, + { + "epoch": 0.76875, + "grad_norm": 2.709362030029297, + "learning_rate": 3.623704591785189e-05, + "loss": 0.5796, + "step": 246 + }, + { + "epoch": 0.771875, + "grad_norm": 2.5690431594848633, + "learning_rate": 3.6175866781622e-05, + "loss": 0.6206, + "step": 247 + }, + { + "epoch": 0.775, + "grad_norm": 3.5460782051086426, + "learning_rate": 3.611426688032439e-05, + "loss": 0.7836, + "step": 248 + }, + { + "epoch": 0.778125, + "grad_norm": 2.9132962226867676, + "learning_rate": 3.605224853316055e-05, + "loss": 0.615, + "step": 249 + }, + { + "epoch": 0.78125, + "grad_norm": 2.707908868789673, + "learning_rate": 3.5989814075086195e-05, + "loss": 0.433, + "step": 250 + }, + { + "epoch": 0.784375, + "grad_norm": 3.2124290466308594, + "learning_rate": 3.592696585672338e-05, + "loss": 0.7394, + "step": 251 + }, + { + "epoch": 0.7875, + "grad_norm": 1.3290472030639648, + "learning_rate": 3.5863706244272006e-05, + "loss": 0.1203, + "step": 252 + }, + { + "epoch": 0.790625, + "grad_norm": 3.5975258350372314, + "learning_rate": 3.580003761942073e-05, + "loss": 1.0909, + "step": 253 + }, + { + "epoch": 0.79375, + "grad_norm": 3.1402907371520996, + "learning_rate": 3.573596237925728e-05, + "loss": 0.7107, + "step": 254 + }, + { + "epoch": 0.796875, + "grad_norm": 2.1686770915985107, + "learning_rate": 3.567148293617825e-05, + "loss": 0.3464, + "step": 255 + }, + { + "epoch": 0.8, + "grad_norm": 4.112154960632324, + "learning_rate": 3.560660171779821e-05, + "loss": 0.9347, + "step": 256 + }, + { + "epoch": 0.8, + "eval_VitaminC_cosine_accuracy": 0.560546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.825050950050354, + "eval_VitaminC_cosine_ap": 0.5480281823929228, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.39935123920440674, + "eval_VitaminC_cosine_precision": 0.501002004008016, + "eval_VitaminC_cosine_recall": 0.9960159362549801, + "eval_VitaminC_dot_accuracy": 0.552734375, + "eval_VitaminC_dot_accuracy_threshold": 313.59075927734375, + "eval_VitaminC_dot_ap": 0.5329984665726657, + "eval_VitaminC_dot_f1": 0.6657789613848203, + "eval_VitaminC_dot_f1_threshold": 132.71243286132812, + "eval_VitaminC_dot_precision": 0.5, + "eval_VitaminC_dot_recall": 0.9960159362549801, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 11.227453231811523, + "eval_VitaminC_euclidean_ap": 0.5496569156706412, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 22.6641788482666, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 230.89329528808594, + "eval_VitaminC_manhattan_ap": 0.545699310794812, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 483.625244140625, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 313.59075927734375, + "eval_VitaminC_max_ap": 0.5496569156706412, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 483.625244140625, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5496569156706412, + "eval_sts-test_pearson_cosine": 0.8711436629553765, + "eval_sts-test_pearson_dot": 0.859333025320038, + "eval_sts-test_pearson_euclidean": 0.8967955144362856, + "eval_sts-test_pearson_manhattan": 0.8972988934332646, + "eval_sts-test_pearson_max": 0.8972988934332646, + "eval_sts-test_spearman_cosine": 0.8987000224084064, + "eval_sts-test_spearman_dot": 0.8599958647150425, + "eval_sts-test_spearman_euclidean": 0.8935259263175941, + "eval_sts-test_spearman_manhattan": 0.8939636384052635, + "eval_sts-test_spearman_max": 0.8987000224084064, + "eval_vitaminc-pairs_loss": 2.0033843517303467, + "eval_vitaminc-pairs_runtime": 1.8717, + "eval_vitaminc-pairs_samples_per_second": 57.701, + "eval_vitaminc-pairs_steps_per_second": 1.069, + "step": 256 + }, + { + "epoch": 0.8, + "eval_negation-triplets_loss": 0.7500894069671631, + "eval_negation-triplets_runtime": 0.2995, + "eval_negation-triplets_samples_per_second": 213.659, + "eval_negation-triplets_steps_per_second": 3.338, + "step": 256 + }, + { + "epoch": 0.8, + "eval_scitail-pairs-pos_loss": 0.07255758345127106, + "eval_scitail-pairs-pos_runtime": 0.386, + "eval_scitail-pairs-pos_samples_per_second": 139.883, + "eval_scitail-pairs-pos_steps_per_second": 2.59, + "step": 256 + }, + { + "epoch": 0.8, + "eval_xsum-pairs_loss": 0.056476954370737076, + "eval_xsum-pairs_runtime": 2.8548, + "eval_xsum-pairs_samples_per_second": 44.837, + "eval_xsum-pairs_steps_per_second": 0.701, + "step": 256 + }, + { + "epoch": 0.8, + "eval_sciq_pairs_loss": 0.01967025361955166, + "eval_sciq_pairs_runtime": 3.7336, + "eval_sciq_pairs_samples_per_second": 34.283, + "eval_sciq_pairs_steps_per_second": 0.536, + "step": 256 + }, + { + "epoch": 0.8, + "eval_qasc_pairs_loss": 0.1263607293367386, + "eval_qasc_pairs_runtime": 0.6107, + "eval_qasc_pairs_samples_per_second": 209.594, + "eval_qasc_pairs_steps_per_second": 3.275, + "step": 256 + }, + { + "epoch": 0.8, + "eval_openbookqa_pairs_loss": 0.7773354649543762, + "eval_openbookqa_pairs_runtime": 0.5903, + "eval_openbookqa_pairs_samples_per_second": 216.831, + "eval_openbookqa_pairs_steps_per_second": 3.388, + "step": 256 + }, + { + "epoch": 0.8, + "eval_msmarco_pairs_loss": 0.2844376862049103, + "eval_msmarco_pairs_runtime": 1.4722, + "eval_msmarco_pairs_samples_per_second": 86.947, + "eval_msmarco_pairs_steps_per_second": 1.359, + "step": 256 + }, + { + "epoch": 0.8, + "eval_nq_pairs_loss": 0.17289823293685913, + "eval_nq_pairs_runtime": 2.8665, + "eval_nq_pairs_samples_per_second": 44.654, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 256 + }, + { + "epoch": 0.8, + "eval_trivia_pairs_loss": 0.6546728610992432, + "eval_trivia_pairs_runtime": 4.3994, + "eval_trivia_pairs_samples_per_second": 29.095, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 256 + }, + { + "epoch": 0.8, + "eval_gooaq_pairs_loss": 0.31546029448509216, + "eval_gooaq_pairs_runtime": 1.0423, + "eval_gooaq_pairs_samples_per_second": 122.802, + "eval_gooaq_pairs_steps_per_second": 1.919, + "step": 256 + }, + { + "epoch": 0.8, + "eval_paws-pos_loss": 0.02565235085785389, + "eval_paws-pos_runtime": 0.6999, + "eval_paws-pos_samples_per_second": 182.88, + "eval_paws-pos_steps_per_second": 2.857, + "step": 256 + }, + { + "epoch": 0.803125, + "grad_norm": 2.2415249347686768, + "learning_rate": 3.5541321166858384e-05, + "loss": 0.464, + "step": 257 + }, + { + "epoch": 0.80625, + "grad_norm": 2.22743821144104, + "learning_rate": 3.54756437411346e-05, + "loss": 0.4622, + "step": 258 + }, + { + "epoch": 0.809375, + "grad_norm": 2.5632565021514893, + "learning_rate": 3.5409571913344813e-05, + "loss": 0.5124, + "step": 259 + }, + { + "epoch": 0.8125, + "grad_norm": 3.4271864891052246, + "learning_rate": 3.5343108171056006e-05, + "loss": 0.832, + "step": 260 + }, + { + "epoch": 0.815625, + "grad_norm": 2.9892525672912598, + "learning_rate": 3.527625501659051e-05, + "loss": 0.6264, + "step": 261 + }, + { + "epoch": 0.81875, + "grad_norm": 2.808922529220581, + "learning_rate": 3.5209014966931795e-05, + "loss": 0.5483, + "step": 262 + }, + { + "epoch": 0.821875, + "grad_norm": 2.71504545211792, + "learning_rate": 3.514139055362974e-05, + "loss": 0.5929, + "step": 263 + }, + { + "epoch": 0.825, + "grad_norm": 2.9386723041534424, + "learning_rate": 3.507338432270528e-05, + "loss": 0.5797, + "step": 264 + }, + { + "epoch": 0.828125, + "grad_norm": 2.694045066833496, + "learning_rate": 3.500499883455457e-05, + "loss": 0.5292, + "step": 265 + }, + { + "epoch": 0.83125, + "grad_norm": 2.800262928009033, + "learning_rate": 3.493623666385258e-05, + "loss": 0.5376, + "step": 266 + }, + { + "epoch": 0.834375, + "grad_norm": 3.4821765422821045, + "learning_rate": 3.486710039945618e-05, + "loss": 0.7102, + "step": 267 + }, + { + "epoch": 0.8375, + "grad_norm": 2.337831735610962, + "learning_rate": 3.4797592644306655e-05, + "loss": 0.4605, + "step": 268 + }, + { + "epoch": 0.840625, + "grad_norm": 3.8004300594329834, + "learning_rate": 3.472771601533169e-05, + "loss": 1.2713, + "step": 269 + }, + { + "epoch": 0.84375, + "grad_norm": 3.3914785385131836, + "learning_rate": 3.465747314334687e-05, + "loss": 0.7764, + "step": 270 + }, + { + "epoch": 0.846875, + "grad_norm": 3.0255892276763916, + "learning_rate": 3.458686667295664e-05, + "loss": 0.7517, + "step": 271 + }, + { + "epoch": 0.85, + "grad_norm": 2.9869744777679443, + "learning_rate": 3.451589926245469e-05, + "loss": 0.614, + "step": 272 + }, + { + "epoch": 0.853125, + "grad_norm": 3.160764694213867, + "learning_rate": 3.444457358372391e-05, + "loss": 0.6046, + "step": 273 + }, + { + "epoch": 0.85625, + "grad_norm": 2.87579607963562, + "learning_rate": 3.43728923221358e-05, + "loss": 0.7111, + "step": 274 + }, + { + "epoch": 0.859375, + "grad_norm": 1.9325075149536133, + "learning_rate": 3.4300858176449344e-05, + "loss": 0.4401, + "step": 275 + }, + { + "epoch": 0.8625, + "grad_norm": 1.9690322875976562, + "learning_rate": 3.4228473858709404e-05, + "loss": 0.4351, + "step": 276 + }, + { + "epoch": 0.865625, + "grad_norm": 3.530524969100952, + "learning_rate": 3.4155742094144646e-05, + "loss": 0.7498, + "step": 277 + }, + { + "epoch": 0.86875, + "grad_norm": 3.321233034133911, + "learning_rate": 3.408266562106489e-05, + "loss": 0.7173, + "step": 278 + }, + { + "epoch": 0.871875, + "grad_norm": 2.2215065956115723, + "learning_rate": 3.400924719075804e-05, + "loss": 0.4696, + "step": 279 + }, + { + "epoch": 0.875, + "grad_norm": 3.1400840282440186, + "learning_rate": 3.39354895673865e-05, + "loss": 0.6246, + "step": 280 + }, + { + "epoch": 0.878125, + "grad_norm": 3.4510090351104736, + "learning_rate": 3.386139552788312e-05, + "loss": 0.7578, + "step": 281 + }, + { + "epoch": 0.88125, + "grad_norm": 2.350965976715088, + "learning_rate": 3.378696786184659e-05, + "loss": 0.3533, + "step": 282 + }, + { + "epoch": 0.884375, + "grad_norm": 3.5409841537475586, + "learning_rate": 3.3712209371436473e-05, + "loss": 0.7328, + "step": 283 + }, + { + "epoch": 0.8875, + "grad_norm": 3.4038257598876953, + "learning_rate": 3.363712287126768e-05, + "loss": 0.6964, + "step": 284 + }, + { + "epoch": 0.890625, + "grad_norm": 2.8739030361175537, + "learning_rate": 3.3561711188304516e-05, + "loss": 0.6431, + "step": 285 + }, + { + "epoch": 0.89375, + "grad_norm": 3.5703017711639404, + "learning_rate": 3.34859771617542e-05, + "loss": 0.7155, + "step": 286 + }, + { + "epoch": 0.896875, + "grad_norm": 2.76778244972229, + "learning_rate": 3.340992364296004e-05, + "loss": 0.6328, + "step": 287 + }, + { + "epoch": 0.9, + "grad_norm": 3.4040513038635254, + "learning_rate": 3.333355349529403e-05, + "loss": 0.7895, + "step": 288 + }, + { + "epoch": 0.9, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8272709846496582, + "eval_VitaminC_cosine_ap": 0.5489140066962175, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.3126052916049957, + "eval_VitaminC_cosine_precision": 0.501002004008016, + "eval_VitaminC_cosine_recall": 0.9960159362549801, + "eval_VitaminC_dot_accuracy": 0.552734375, + "eval_VitaminC_dot_accuracy_threshold": 303.1324157714844, + "eval_VitaminC_dot_ap": 0.5301817831729955, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 120.97600555419922, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 11.374759674072266, + "eval_VitaminC_euclidean_ap": 0.551008119376775, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 24.255207061767578, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 230.6835174560547, + "eval_VitaminC_manhattan_ap": 0.5485867585720646, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 521.4428100585938, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 303.1324157714844, + "eval_VitaminC_max_ap": 0.551008119376775, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 521.4428100585938, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.551008119376775, + "eval_sts-test_pearson_cosine": 0.8726396664543798, + "eval_sts-test_pearson_dot": 0.8623668711287399, + "eval_sts-test_pearson_euclidean": 0.8950211806151552, + "eval_sts-test_pearson_manhattan": 0.8954158210085943, + "eval_sts-test_pearson_max": 0.8954158210085943, + "eval_sts-test_spearman_cosine": 0.897937595168081, + "eval_sts-test_spearman_dot": 0.8635840656046664, + "eval_sts-test_spearman_euclidean": 0.8912111673221239, + "eval_sts-test_spearman_manhattan": 0.8913994806300589, + "eval_sts-test_spearman_max": 0.897937595168081, + "eval_vitaminc-pairs_loss": 1.955485224723816, + "eval_vitaminc-pairs_runtime": 1.8698, + "eval_vitaminc-pairs_samples_per_second": 57.76, + "eval_vitaminc-pairs_steps_per_second": 1.07, + "step": 288 + }, + { + "epoch": 0.9, + "eval_negation-triplets_loss": 0.7942228317260742, + "eval_negation-triplets_runtime": 0.2979, + "eval_negation-triplets_samples_per_second": 214.818, + "eval_negation-triplets_steps_per_second": 3.357, + "step": 288 + }, + { + "epoch": 0.9, + "eval_scitail-pairs-pos_loss": 0.07541428506374359, + "eval_scitail-pairs-pos_runtime": 0.381, + "eval_scitail-pairs-pos_samples_per_second": 141.723, + "eval_scitail-pairs-pos_steps_per_second": 2.625, + "step": 288 + }, + { + "epoch": 0.9, + "eval_xsum-pairs_loss": 0.05658277869224548, + "eval_xsum-pairs_runtime": 2.8504, + "eval_xsum-pairs_samples_per_second": 44.906, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 288 + }, + { + "epoch": 0.9, + "eval_sciq_pairs_loss": 0.019849741831421852, + "eval_sciq_pairs_runtime": 3.6603, + "eval_sciq_pairs_samples_per_second": 34.97, + "eval_sciq_pairs_steps_per_second": 0.546, + "step": 288 + }, + { + "epoch": 0.9, + "eval_qasc_pairs_loss": 0.10889946669340134, + "eval_qasc_pairs_runtime": 0.6033, + "eval_qasc_pairs_samples_per_second": 212.165, + "eval_qasc_pairs_steps_per_second": 3.315, + "step": 288 + }, + { + "epoch": 0.9, + "eval_openbookqa_pairs_loss": 0.7712036967277527, + "eval_openbookqa_pairs_runtime": 0.585, + "eval_openbookqa_pairs_samples_per_second": 218.815, + "eval_openbookqa_pairs_steps_per_second": 3.419, + "step": 288 + }, + { + "epoch": 0.9, + "eval_msmarco_pairs_loss": 0.279923677444458, + "eval_msmarco_pairs_runtime": 1.4672, + "eval_msmarco_pairs_samples_per_second": 87.239, + "eval_msmarco_pairs_steps_per_second": 1.363, + "step": 288 + }, + { + "epoch": 0.9, + "eval_nq_pairs_loss": 0.18058110773563385, + "eval_nq_pairs_runtime": 2.8678, + "eval_nq_pairs_samples_per_second": 44.634, + "eval_nq_pairs_steps_per_second": 0.697, + "step": 288 + }, + { + "epoch": 0.9, + "eval_trivia_pairs_loss": 0.7307667136192322, + "eval_trivia_pairs_runtime": 4.4071, + "eval_trivia_pairs_samples_per_second": 29.044, + "eval_trivia_pairs_steps_per_second": 0.454, + "step": 288 + }, + { + "epoch": 0.9, + "eval_gooaq_pairs_loss": 0.33244821429252625, + "eval_gooaq_pairs_runtime": 1.0096, + "eval_gooaq_pairs_samples_per_second": 126.785, + "eval_gooaq_pairs_steps_per_second": 1.981, + "step": 288 + }, + { + "epoch": 0.9, + "eval_paws-pos_loss": 0.024881305173039436, + "eval_paws-pos_runtime": 0.6946, + "eval_paws-pos_samples_per_second": 184.279, + "eval_paws-pos_steps_per_second": 2.879, + "step": 288 + }, + { + "epoch": 0.903125, + "grad_norm": 2.7424654960632324, + "learning_rate": 3.325686959404907e-05, + "loss": 0.5752, + "step": 289 + }, + { + "epoch": 0.90625, + "grad_norm": 2.913073778152466, + "learning_rate": 3.3179874826330696e-05, + "loss": 0.666, + "step": 290 + }, + { + "epoch": 0.909375, + "grad_norm": 3.9191319942474365, + "learning_rate": 3.3102572090948395e-05, + "loss": 0.874, + "step": 291 + }, + { + "epoch": 0.9125, + "grad_norm": 3.086979627609253, + "learning_rate": 3.302496429830647e-05, + "loss": 0.7431, + "step": 292 + }, + { + "epoch": 0.915625, + "grad_norm": 3.0514609813690186, + "learning_rate": 3.294705437029443e-05, + "loss": 0.8332, + "step": 293 + }, + { + "epoch": 0.91875, + "grad_norm": 3.042734384536743, + "learning_rate": 3.2868845240177035e-05, + "loss": 0.7082, + "step": 294 + }, + { + "epoch": 0.921875, + "grad_norm": 3.4690864086151123, + "learning_rate": 3.2790339852483845e-05, + "loss": 0.6618, + "step": 295 + }, + { + "epoch": 0.925, + "grad_norm": 2.520153045654297, + "learning_rate": 3.2711541162898326e-05, + "loss": 0.2375, + "step": 296 + }, + { + "epoch": 0.928125, + "grad_norm": 2.9911270141601562, + "learning_rate": 3.2632452138146607e-05, + "loss": 0.5305, + "step": 297 + }, + { + "epoch": 0.93125, + "grad_norm": 2.2287964820861816, + "learning_rate": 3.255307575588577e-05, + "loss": 0.1686, + "step": 298 + }, + { + "epoch": 0.934375, + "grad_norm": 3.2477688789367676, + "learning_rate": 3.247341500459173e-05, + "loss": 0.7938, + "step": 299 + }, + { + "epoch": 0.9375, + "grad_norm": 1.9740976095199585, + "learning_rate": 3.239347288344676e-05, + "loss": 0.2629, + "step": 300 + }, + { + "epoch": 0.940625, + "grad_norm": 4.1774702072143555, + "learning_rate": 3.231325240222655e-05, + "loss": 0.973, + "step": 301 + }, + { + "epoch": 0.94375, + "grad_norm": 3.6038107872009277, + "learning_rate": 3.2232756581186846e-05, + "loss": 0.649, + "step": 302 + }, + { + "epoch": 0.946875, + "grad_norm": 2.0142273902893066, + "learning_rate": 3.215198845094984e-05, + "loss": 0.3329, + "step": 303 + }, + { + "epoch": 0.95, + "grad_norm": 3.460426092147827, + "learning_rate": 3.2070951052389975e-05, + "loss": 0.6105, + "step": 304 + }, + { + "epoch": 0.953125, + "grad_norm": 2.1552436351776123, + "learning_rate": 3.198964743651949e-05, + "loss": 0.3621, + "step": 305 + }, + { + "epoch": 0.95625, + "grad_norm": 2.6201255321502686, + "learning_rate": 3.1908080664373605e-05, + "loss": 0.5165, + "step": 306 + }, + { + "epoch": 0.959375, + "grad_norm": 3.296206474304199, + "learning_rate": 3.182625380689516e-05, + "loss": 0.6075, + "step": 307 + }, + { + "epoch": 0.9625, + "grad_norm": 2.3535473346710205, + "learning_rate": 3.17441699448191e-05, + "loss": 0.3091, + "step": 308 + }, + { + "epoch": 0.965625, + "grad_norm": 2.1077566146850586, + "learning_rate": 3.166183216855644e-05, + "loss": 0.2762, + "step": 309 + }, + { + "epoch": 0.96875, + "grad_norm": 2.85646390914917, + "learning_rate": 3.157924357807792e-05, + "loss": 0.5736, + "step": 310 + }, + { + "epoch": 0.971875, + "grad_norm": 2.4051146507263184, + "learning_rate": 3.149640728279728e-05, + "loss": 0.3876, + "step": 311 + }, + { + "epoch": 0.975, + "grad_norm": 5.062899112701416, + "learning_rate": 3.141332640145423e-05, + "loss": 1.8005, + "step": 312 + }, + { + "epoch": 0.978125, + "grad_norm": 2.969027042388916, + "learning_rate": 3.1330004061997e-05, + "loss": 0.6344, + "step": 313 + }, + { + "epoch": 0.98125, + "grad_norm": 4.5385847091674805, + "learning_rate": 3.1246443401464564e-05, + "loss": 0.9414, + "step": 314 + }, + { + "epoch": 0.984375, + "grad_norm": 2.6700010299682617, + "learning_rate": 3.116264756586856e-05, + "loss": 0.4782, + "step": 315 + }, + { + "epoch": 0.9875, + "grad_norm": 2.293757438659668, + "learning_rate": 3.107861971007485e-05, + "loss": 0.4196, + "step": 316 + }, + { + "epoch": 0.990625, + "grad_norm": 5.584008693695068, + "learning_rate": 3.099436299768471e-05, + "loss": 0.5288, + "step": 317 + }, + { + "epoch": 0.99375, + "grad_norm": 3.047480344772339, + "learning_rate": 3.0909880600915726e-05, + "loss": 0.5888, + "step": 318 + }, + { + "epoch": 0.996875, + "grad_norm": 3.148433208465576, + "learning_rate": 3.08251757004824e-05, + "loss": 0.4598, + "step": 319 + }, + { + "epoch": 1.0, + "grad_norm": 3.277242660522461, + "learning_rate": 3.074025148547635e-05, + "loss": 0.5085, + "step": 320 + }, + { + "epoch": 1.0, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8424822092056274, + "eval_VitaminC_cosine_ap": 0.5467401178776568, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.3060212731361389, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55859375, + "eval_VitaminC_dot_accuracy_threshold": 302.82525634765625, + "eval_VitaminC_dot_ap": 0.5313187944370502, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 112.19659423828125, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 13.824159622192383, + "eval_VitaminC_euclidean_ap": 0.5479307244374829, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 23.69076919555664, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 292.99462890625, + "eval_VitaminC_manhattan_ap": 0.5465792848292811, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 489.7302551269531, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 302.82525634765625, + "eval_VitaminC_max_ap": 0.5479307244374829, + "eval_VitaminC_max_f1": 0.6666666666666666, + "eval_VitaminC_max_f1_threshold": 489.7302551269531, + "eval_VitaminC_max_precision": 0.5, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5479307244374829, + "eval_sts-test_pearson_cosine": 0.87646365142741, + "eval_sts-test_pearson_dot": 0.8655190609079275, + "eval_sts-test_pearson_euclidean": 0.9009817964818363, + "eval_sts-test_pearson_manhattan": 0.9014432269871114, + "eval_sts-test_pearson_max": 0.9014432269871114, + "eval_sts-test_spearman_cosine": 0.9030024086785755, + "eval_sts-test_spearman_dot": 0.8673856405086042, + "eval_sts-test_spearman_euclidean": 0.8983721299161916, + "eval_sts-test_spearman_manhattan": 0.8981219256137521, + "eval_sts-test_spearman_max": 0.9030024086785755, + "eval_vitaminc-pairs_loss": 1.9213347434997559, + "eval_vitaminc-pairs_runtime": 1.866, + "eval_vitaminc-pairs_samples_per_second": 57.877, + "eval_vitaminc-pairs_steps_per_second": 1.072, + "step": 320 + }, + { + "epoch": 1.0, + "eval_negation-triplets_loss": 0.7787352204322815, + "eval_negation-triplets_runtime": 0.2979, + "eval_negation-triplets_samples_per_second": 214.834, + "eval_negation-triplets_steps_per_second": 3.357, + "step": 320 + }, + { + "epoch": 1.0, + "eval_scitail-pairs-pos_loss": 0.06892620027065277, + "eval_scitail-pairs-pos_runtime": 0.4252, + "eval_scitail-pairs-pos_samples_per_second": 126.994, + "eval_scitail-pairs-pos_steps_per_second": 2.352, + "step": 320 + }, + { + "epoch": 1.0, + "eval_xsum-pairs_loss": 0.05507522076368332, + "eval_xsum-pairs_runtime": 2.8476, + "eval_xsum-pairs_samples_per_second": 44.951, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 320 + }, + { + "epoch": 1.0, + "eval_sciq_pairs_loss": 0.020738935098052025, + "eval_sciq_pairs_runtime": 3.7008, + "eval_sciq_pairs_samples_per_second": 34.587, + "eval_sciq_pairs_steps_per_second": 0.54, + "step": 320 + }, + { + "epoch": 1.0, + "eval_qasc_pairs_loss": 0.10421090573072433, + "eval_qasc_pairs_runtime": 0.6054, + "eval_qasc_pairs_samples_per_second": 211.426, + "eval_qasc_pairs_steps_per_second": 3.304, + "step": 320 + }, + { + "epoch": 1.0, + "eval_openbookqa_pairs_loss": 0.694441020488739, + "eval_openbookqa_pairs_runtime": 0.6019, + "eval_openbookqa_pairs_samples_per_second": 212.646, + "eval_openbookqa_pairs_steps_per_second": 3.323, + "step": 320 + }, + { + "epoch": 1.0, + "eval_msmarco_pairs_loss": 0.28574398159980774, + "eval_msmarco_pairs_runtime": 1.4875, + "eval_msmarco_pairs_samples_per_second": 86.048, + "eval_msmarco_pairs_steps_per_second": 1.344, + "step": 320 + }, + { + "epoch": 1.0, + "eval_nq_pairs_loss": 0.17458948493003845, + "eval_nq_pairs_runtime": 2.8657, + "eval_nq_pairs_samples_per_second": 44.666, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 320 + }, + { + "epoch": 1.0, + "eval_trivia_pairs_loss": 0.68446946144104, + "eval_trivia_pairs_runtime": 4.4, + "eval_trivia_pairs_samples_per_second": 29.091, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 320 + }, + { + "epoch": 1.0, + "eval_gooaq_pairs_loss": 0.3039962947368622, + "eval_gooaq_pairs_runtime": 1.0187, + "eval_gooaq_pairs_samples_per_second": 125.646, + "eval_gooaq_pairs_steps_per_second": 1.963, + "step": 320 + }, + { + "epoch": 1.0, + "eval_paws-pos_loss": 0.024999650195240974, + "eval_paws-pos_runtime": 0.7064, + "eval_paws-pos_samples_per_second": 181.207, + "eval_paws-pos_steps_per_second": 2.831, + "step": 320 + }, + { + "epoch": 1.003125, + "grad_norm": 2.7458887100219727, + "learning_rate": 3.065511115324628e-05, + "loss": 0.647, + "step": 321 + }, + { + "epoch": 1.00625, + "grad_norm": 2.646803140640259, + "learning_rate": 3.0569757909277566e-05, + "loss": 0.4768, + "step": 322 + }, + { + "epoch": 1.009375, + "grad_norm": 2.367361545562744, + "learning_rate": 3.048419496707161e-05, + "loss": 0.4834, + "step": 323 + }, + { + "epoch": 1.0125, + "grad_norm": 3.055002450942993, + "learning_rate": 3.0398425548024827e-05, + "loss": 0.6115, + "step": 324 + }, + { + "epoch": 1.015625, + "grad_norm": 2.0717179775238037, + "learning_rate": 3.0312452881307356e-05, + "loss": 0.4611, + "step": 325 + }, + { + "epoch": 1.01875, + "grad_norm": 2.3982598781585693, + "learning_rate": 3.022628020374152e-05, + "loss": 0.4812, + "step": 326 + }, + { + "epoch": 1.021875, + "grad_norm": 2.90179705619812, + "learning_rate": 3.013991075967992e-05, + "loss": 0.5914, + "step": 327 + }, + { + "epoch": 1.025, + "grad_norm": 3.2376556396484375, + "learning_rate": 3.00533478008833e-05, + "loss": 0.7206, + "step": 328 + }, + { + "epoch": 1.028125, + "grad_norm": 3.591564416885376, + "learning_rate": 2.996659458639815e-05, + "loss": 0.7854, + "step": 329 + }, + { + "epoch": 1.03125, + "grad_norm": 2.470400094985962, + "learning_rate": 2.9879654382433948e-05, + "loss": 0.432, + "step": 330 + }, + { + "epoch": 1.034375, + "grad_norm": 3.061913013458252, + "learning_rate": 2.979253046224024e-05, + "loss": 0.6365, + "step": 331 + }, + { + "epoch": 1.0375, + "grad_norm": 2.3621861934661865, + "learning_rate": 2.9705226105983377e-05, + "loss": 0.3754, + "step": 332 + }, + { + "epoch": 1.040625, + "grad_norm": 2.898756742477417, + "learning_rate": 2.9617744600623023e-05, + "loss": 0.5096, + "step": 333 + }, + { + "epoch": 1.04375, + "grad_norm": 2.9752399921417236, + "learning_rate": 2.9530089239788428e-05, + "loss": 0.5762, + "step": 334 + }, + { + "epoch": 1.046875, + "grad_norm": 3.2658884525299072, + "learning_rate": 2.9442263323654362e-05, + "loss": 0.6938, + "step": 335 + }, + { + "epoch": 1.05, + "grad_norm": 2.0361263751983643, + "learning_rate": 2.935427015881694e-05, + "loss": 0.343, + "step": 336 + }, + { + "epoch": 1.053125, + "grad_norm": 3.670530319213867, + "learning_rate": 2.926611305816908e-05, + "loss": 0.7258, + "step": 337 + }, + { + "epoch": 1.05625, + "grad_norm": 2.597907066345215, + "learning_rate": 2.9177795340775795e-05, + "loss": 0.4658, + "step": 338 + }, + { + "epoch": 1.059375, + "grad_norm": 3.1930811405181885, + "learning_rate": 2.9089320331749237e-05, + "loss": 0.7108, + "step": 339 + }, + { + "epoch": 1.0625, + "grad_norm": 4.060088157653809, + "learning_rate": 2.9000691362123475e-05, + "loss": 1.3076, + "step": 340 + }, + { + "epoch": 1.065625, + "grad_norm": 1.4222996234893799, + "learning_rate": 2.8911911768729136e-05, + "loss": 0.2397, + "step": 341 + }, + { + "epoch": 1.06875, + "grad_norm": 2.6759979724884033, + "learning_rate": 2.8822984894067722e-05, + "loss": 0.4853, + "step": 342 + }, + { + "epoch": 1.071875, + "grad_norm": 3.4097981452941895, + "learning_rate": 2.8733914086185807e-05, + "loss": 0.741, + "step": 343 + }, + { + "epoch": 1.075, + "grad_norm": 2.869738817214966, + "learning_rate": 2.8644702698548962e-05, + "loss": 0.6066, + "step": 344 + }, + { + "epoch": 1.078125, + "grad_norm": 3.412572145462036, + "learning_rate": 2.8555354089915514e-05, + "loss": 0.6838, + "step": 345 + }, + { + "epoch": 1.08125, + "grad_norm": 2.155133008956909, + "learning_rate": 2.846587162421007e-05, + "loss": 0.4393, + "step": 346 + }, + { + "epoch": 1.084375, + "grad_norm": 2.3955204486846924, + "learning_rate": 2.837625867039689e-05, + "loss": 0.4102, + "step": 347 + }, + { + "epoch": 1.0875, + "grad_norm": 2.5801889896392822, + "learning_rate": 2.8286518602353047e-05, + "loss": 0.4947, + "step": 348 + }, + { + "epoch": 1.090625, + "grad_norm": 2.63447904586792, + "learning_rate": 2.819665479874137e-05, + "loss": 0.5212, + "step": 349 + }, + { + "epoch": 1.09375, + "grad_norm": 2.7823500633239746, + "learning_rate": 2.8106670642883283e-05, + "loss": 0.6889, + "step": 350 + }, + { + "epoch": 1.096875, + "grad_norm": 2.979808807373047, + "learning_rate": 2.8016569522631384e-05, + "loss": 0.625, + "step": 351 + }, + { + "epoch": 1.1, + "grad_norm": 2.9141488075256348, + "learning_rate": 2.792635483024193e-05, + "loss": 0.5093, + "step": 352 + }, + { + "epoch": 1.1, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8290125131607056, + "eval_VitaminC_cosine_ap": 0.5484962367283152, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.3529857099056244, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 324.3284606933594, + "eval_VitaminC_dot_ap": 0.5323604009341977, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 137.8323211669922, + "eval_VitaminC_dot_precision": 0.501002004008016, + "eval_VitaminC_dot_recall": 0.9960159362549801, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 13.973267555236816, + "eval_VitaminC_euclidean_ap": 0.5488900714831766, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 22.846126556396484, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 292.48834228515625, + "eval_VitaminC_manhattan_ap": 0.5472615547862266, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 487.93536376953125, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 324.3284606933594, + "eval_VitaminC_max_ap": 0.5488900714831766, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 487.93536376953125, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5488900714831766, + "eval_sts-test_pearson_cosine": 0.8777529500191548, + "eval_sts-test_pearson_dot": 0.8689529679551734, + "eval_sts-test_pearson_euclidean": 0.8997770430839387, + "eval_sts-test_pearson_manhattan": 0.8993770557804839, + "eval_sts-test_pearson_max": 0.8997770430839387, + "eval_sts-test_spearman_cosine": 0.9027963738711295, + "eval_sts-test_spearman_dot": 0.8692104626943614, + "eval_sts-test_spearman_euclidean": 0.897084054359563, + "eval_sts-test_spearman_manhattan": 0.8970093645043006, + "eval_sts-test_spearman_max": 0.9027963738711295, + "eval_vitaminc-pairs_loss": 1.9221601486206055, + "eval_vitaminc-pairs_runtime": 1.8539, + "eval_vitaminc-pairs_samples_per_second": 58.254, + "eval_vitaminc-pairs_steps_per_second": 1.079, + "step": 352 + }, + { + "epoch": 1.1, + "eval_negation-triplets_loss": 0.7761179208755493, + "eval_negation-triplets_runtime": 0.2931, + "eval_negation-triplets_samples_per_second": 218.388, + "eval_negation-triplets_steps_per_second": 3.412, + "step": 352 + }, + { + "epoch": 1.1, + "eval_scitail-pairs-pos_loss": 0.08009649068117142, + "eval_scitail-pairs-pos_runtime": 0.3758, + "eval_scitail-pairs-pos_samples_per_second": 143.684, + "eval_scitail-pairs-pos_steps_per_second": 2.661, + "step": 352 + }, + { + "epoch": 1.1, + "eval_xsum-pairs_loss": 0.062557153403759, + "eval_xsum-pairs_runtime": 2.8489, + "eval_xsum-pairs_samples_per_second": 44.93, + "eval_xsum-pairs_steps_per_second": 0.702, + "step": 352 + }, + { + "epoch": 1.1, + "eval_sciq_pairs_loss": 0.019746748730540276, + "eval_sciq_pairs_runtime": 3.6515, + "eval_sciq_pairs_samples_per_second": 35.054, + "eval_sciq_pairs_steps_per_second": 0.548, + "step": 352 + }, + { + "epoch": 1.1, + "eval_qasc_pairs_loss": 0.10993637144565582, + "eval_qasc_pairs_runtime": 0.6014, + "eval_qasc_pairs_samples_per_second": 212.82, + "eval_qasc_pairs_steps_per_second": 3.325, + "step": 352 + }, + { + "epoch": 1.1, + "eval_openbookqa_pairs_loss": 0.7048032879829407, + "eval_openbookqa_pairs_runtime": 0.5788, + "eval_openbookqa_pairs_samples_per_second": 221.148, + "eval_openbookqa_pairs_steps_per_second": 3.455, + "step": 352 + }, + { + "epoch": 1.1, + "eval_msmarco_pairs_loss": 0.27703118324279785, + "eval_msmarco_pairs_runtime": 1.468, + "eval_msmarco_pairs_samples_per_second": 87.192, + "eval_msmarco_pairs_steps_per_second": 1.362, + "step": 352 + }, + { + "epoch": 1.1, + "eval_nq_pairs_loss": 0.1819453090429306, + "eval_nq_pairs_runtime": 2.8689, + "eval_nq_pairs_samples_per_second": 44.616, + "eval_nq_pairs_steps_per_second": 0.697, + "step": 352 + }, + { + "epoch": 1.1, + "eval_trivia_pairs_loss": 0.687531054019928, + "eval_trivia_pairs_runtime": 4.399, + "eval_trivia_pairs_samples_per_second": 29.098, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 352 + }, + { + "epoch": 1.1, + "eval_gooaq_pairs_loss": 0.30321064591407776, + "eval_gooaq_pairs_runtime": 1.0175, + "eval_gooaq_pairs_samples_per_second": 125.792, + "eval_gooaq_pairs_steps_per_second": 1.966, + "step": 352 + }, + { + "epoch": 1.1, + "eval_paws-pos_loss": 0.02436799556016922, + "eval_paws-pos_runtime": 0.7162, + "eval_paws-pos_samples_per_second": 178.711, + "eval_paws-pos_steps_per_second": 2.792, + "step": 352 + }, + { + "epoch": 1.103125, + "grad_norm": 3.3241679668426514, + "learning_rate": 2.78360299622471e-05, + "loss": 0.6242, + "step": 353 + }, + { + "epoch": 1.10625, + "grad_norm": 3.031259059906006, + "learning_rate": 2.7745598319327117e-05, + "loss": 0.7228, + "step": 354 + }, + { + "epoch": 1.109375, + "grad_norm": 2.223773956298828, + "learning_rate": 2.7655063306182235e-05, + "loss": 0.3717, + "step": 355 + }, + { + "epoch": 1.1125, + "grad_norm": 2.281268835067749, + "learning_rate": 2.7564428331404524e-05, + "loss": 0.3442, + "step": 356 + }, + { + "epoch": 1.115625, + "grad_norm": 3.040951728820801, + "learning_rate": 2.7473696807349552e-05, + "loss": 0.649, + "step": 357 + }, + { + "epoch": 1.11875, + "grad_norm": 2.3970398902893066, + "learning_rate": 2.738287215000792e-05, + "loss": 0.3935, + "step": 358 + }, + { + "epoch": 1.121875, + "grad_norm": 2.8858048915863037, + "learning_rate": 2.7291957778876656e-05, + "loss": 0.6131, + "step": 359 + }, + { + "epoch": 1.125, + "grad_norm": 2.974828004837036, + "learning_rate": 2.7200957116830426e-05, + "loss": 0.5322, + "step": 360 + }, + { + "epoch": 1.128125, + "grad_norm": 1.7254366874694824, + "learning_rate": 2.7109873589992745e-05, + "loss": 0.2073, + "step": 361 + }, + { + "epoch": 1.13125, + "grad_norm": 2.895080804824829, + "learning_rate": 2.7018710627606894e-05, + "loss": 0.6735, + "step": 362 + }, + { + "epoch": 1.134375, + "grad_norm": 3.014303207397461, + "learning_rate": 2.69274716619069e-05, + "loss": 0.7604, + "step": 363 + }, + { + "epoch": 1.1375, + "grad_norm": 2.703094005584717, + "learning_rate": 2.6836160127988247e-05, + "loss": 0.6165, + "step": 364 + }, + { + "epoch": 1.140625, + "grad_norm": 1.903054118156433, + "learning_rate": 2.6744779463678576e-05, + "loss": 0.1963, + "step": 365 + }, + { + "epoch": 1.14375, + "grad_norm": 1.694141149520874, + "learning_rate": 2.665333310940825e-05, + "loss": 0.1668, + "step": 366 + }, + { + "epoch": 1.146875, + "grad_norm": 2.7038228511810303, + "learning_rate": 2.6561824508080824e-05, + "loss": 0.5055, + "step": 367 + }, + { + "epoch": 1.15, + "grad_norm": 2.6325740814208984, + "learning_rate": 2.6470257104943417e-05, + "loss": 0.4919, + "step": 368 + }, + { + "epoch": 1.153125, + "grad_norm": 3.161851167678833, + "learning_rate": 2.6378634347456996e-05, + "loss": 0.7166, + "step": 369 + }, + { + "epoch": 1.15625, + "grad_norm": 2.4141595363616943, + "learning_rate": 2.6286959685166603e-05, + "loss": 0.444, + "step": 370 + }, + { + "epoch": 1.159375, + "grad_norm": 3.2262306213378906, + "learning_rate": 2.6195236569571454e-05, + "loss": 0.6237, + "step": 371 + }, + { + "epoch": 1.1625, + "grad_norm": 2.130065441131592, + "learning_rate": 2.6103468453995017e-05, + "loss": 0.4197, + "step": 372 + }, + { + "epoch": 1.165625, + "grad_norm": 2.9710662364959717, + "learning_rate": 2.601165879345496e-05, + "loss": 0.5569, + "step": 373 + }, + { + "epoch": 1.16875, + "grad_norm": 2.55246901512146, + "learning_rate": 2.591981104453313e-05, + "loss": 0.5274, + "step": 374 + }, + { + "epoch": 1.171875, + "grad_norm": 2.84503436088562, + "learning_rate": 2.5827928665245356e-05, + "loss": 0.6259, + "step": 375 + }, + { + "epoch": 1.175, + "grad_norm": 3.342602491378784, + "learning_rate": 2.5736015114911275e-05, + "loss": 0.7696, + "step": 376 + }, + { + "epoch": 1.178125, + "grad_norm": 2.747089147567749, + "learning_rate": 2.5644073854024117e-05, + "loss": 0.6437, + "step": 377 + }, + { + "epoch": 1.18125, + "grad_norm": 2.5642967224121094, + "learning_rate": 2.5552108344120387e-05, + "loss": 0.5067, + "step": 378 + }, + { + "epoch": 1.184375, + "grad_norm": 2.4318668842315674, + "learning_rate": 2.546012204764955e-05, + "loss": 0.3927, + "step": 379 + }, + { + "epoch": 1.1875, + "grad_norm": 2.7380220890045166, + "learning_rate": 2.536811842784369e-05, + "loss": 0.4557, + "step": 380 + }, + { + "epoch": 1.190625, + "grad_norm": 2.0136771202087402, + "learning_rate": 2.5276100948587075e-05, + "loss": 0.2425, + "step": 381 + }, + { + "epoch": 1.19375, + "grad_norm": 1.7208062410354614, + "learning_rate": 2.51840730742858e-05, + "loss": 0.1677, + "step": 382 + }, + { + "epoch": 1.196875, + "grad_norm": 1.7164028882980347, + "learning_rate": 2.5092038269737324e-05, + "loss": 0.3555, + "step": 383 + }, + { + "epoch": 1.2, + "grad_norm": 3.0403032302856445, + "learning_rate": 2.5e-05, + "loss": 0.8643, + "step": 384 + }, + { + "epoch": 1.2, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8228827118873596, + "eval_VitaminC_cosine_ap": 0.5496046521684337, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2927078902721405, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 303.5928649902344, + "eval_VitaminC_dot_ap": 0.5333968837571262, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 99.95751953125, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 12.029778480529785, + "eval_VitaminC_euclidean_ap": 0.5497621377316283, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.023883819580078, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 239.0825653076172, + "eval_VitaminC_manhattan_ap": 0.550887748657308, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 484.42718505859375, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 303.5928649902344, + "eval_VitaminC_max_ap": 0.550887748657308, + "eval_VitaminC_max_f1": 0.6666666666666666, + "eval_VitaminC_max_f1_threshold": 484.42718505859375, + "eval_VitaminC_max_precision": 0.5, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.550887748657308, + "eval_sts-test_pearson_cosine": 0.8759930670182294, + "eval_sts-test_pearson_dot": 0.8657397744839983, + "eval_sts-test_pearson_euclidean": 0.9011306400734879, + "eval_sts-test_pearson_manhattan": 0.9011853213795427, + "eval_sts-test_pearson_max": 0.9011853213795427, + "eval_sts-test_spearman_cosine": 0.9034707306290366, + "eval_sts-test_spearman_dot": 0.868673716065233, + "eval_sts-test_spearman_euclidean": 0.8986341933028996, + "eval_sts-test_spearman_manhattan": 0.8983098809115962, + "eval_sts-test_spearman_max": 0.9034707306290366, + "eval_vitaminc-pairs_loss": 1.8801089525222778, + "eval_vitaminc-pairs_runtime": 1.8688, + "eval_vitaminc-pairs_samples_per_second": 57.791, + "eval_vitaminc-pairs_steps_per_second": 1.07, + "step": 384 + }, + { + "epoch": 1.2, + "eval_negation-triplets_loss": 0.7317898273468018, + "eval_negation-triplets_runtime": 0.3021, + "eval_negation-triplets_samples_per_second": 211.884, + "eval_negation-triplets_steps_per_second": 3.311, + "step": 384 + }, + { + "epoch": 1.2, + "eval_scitail-pairs-pos_loss": 0.07107817381620407, + "eval_scitail-pairs-pos_runtime": 0.3882, + "eval_scitail-pairs-pos_samples_per_second": 139.106, + "eval_scitail-pairs-pos_steps_per_second": 2.576, + "step": 384 + }, + { + "epoch": 1.2, + "eval_xsum-pairs_loss": 0.05828472599387169, + "eval_xsum-pairs_runtime": 2.853, + "eval_xsum-pairs_samples_per_second": 44.865, + "eval_xsum-pairs_steps_per_second": 0.701, + "step": 384 + }, + { + "epoch": 1.2, + "eval_sciq_pairs_loss": 0.019503507763147354, + "eval_sciq_pairs_runtime": 3.7158, + "eval_sciq_pairs_samples_per_second": 34.448, + "eval_sciq_pairs_steps_per_second": 0.538, + "step": 384 + }, + { + "epoch": 1.2, + "eval_qasc_pairs_loss": 0.11732859164476395, + "eval_qasc_pairs_runtime": 0.605, + "eval_qasc_pairs_samples_per_second": 211.578, + "eval_qasc_pairs_steps_per_second": 3.306, + "step": 384 + }, + { + "epoch": 1.2, + "eval_openbookqa_pairs_loss": 0.7122623324394226, + "eval_openbookqa_pairs_runtime": 0.5839, + "eval_openbookqa_pairs_samples_per_second": 219.199, + "eval_openbookqa_pairs_steps_per_second": 3.425, + "step": 384 + }, + { + "epoch": 1.2, + "eval_msmarco_pairs_loss": 0.28523409366607666, + "eval_msmarco_pairs_runtime": 1.4705, + "eval_msmarco_pairs_samples_per_second": 87.043, + "eval_msmarco_pairs_steps_per_second": 1.36, + "step": 384 + }, + { + "epoch": 1.2, + "eval_nq_pairs_loss": 0.178893581032753, + "eval_nq_pairs_runtime": 2.8648, + "eval_nq_pairs_samples_per_second": 44.681, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 384 + }, + { + "epoch": 1.2, + "eval_trivia_pairs_loss": 0.636802613735199, + "eval_trivia_pairs_runtime": 4.3993, + "eval_trivia_pairs_samples_per_second": 29.096, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 384 + }, + { + "epoch": 1.2, + "eval_gooaq_pairs_loss": 0.3245222866535187, + "eval_gooaq_pairs_runtime": 1.0085, + "eval_gooaq_pairs_samples_per_second": 126.919, + "eval_gooaq_pairs_steps_per_second": 1.983, + "step": 384 + }, + { + "epoch": 1.2, + "eval_paws-pos_loss": 0.024447523057460785, + "eval_paws-pos_runtime": 0.6966, + "eval_paws-pos_samples_per_second": 183.741, + "eval_paws-pos_steps_per_second": 2.871, + "step": 384 + }, + { + "epoch": 1.203125, + "grad_norm": 3.0316460132598877, + "learning_rate": 2.4907961730262685e-05, + "loss": 0.6056, + "step": 385 + }, + { + "epoch": 1.20625, + "grad_norm": 3.3051912784576416, + "learning_rate": 2.4815926925714205e-05, + "loss": 0.5924, + "step": 386 + }, + { + "epoch": 1.209375, + "grad_norm": 2.5136680603027344, + "learning_rate": 2.4723899051412934e-05, + "loss": 0.4131, + "step": 387 + }, + { + "epoch": 1.2125, + "grad_norm": 2.1033709049224854, + "learning_rate": 2.463188157215632e-05, + "loss": 0.3347, + "step": 388 + }, + { + "epoch": 1.215625, + "grad_norm": 2.217355728149414, + "learning_rate": 2.4539877952350458e-05, + "loss": 0.4317, + "step": 389 + }, + { + "epoch": 1.21875, + "grad_norm": 1.9194687604904175, + "learning_rate": 2.444789165587962e-05, + "loss": 0.2488, + "step": 390 + }, + { + "epoch": 1.221875, + "grad_norm": 3.4252638816833496, + "learning_rate": 2.435592614597589e-05, + "loss": 0.6856, + "step": 391 + }, + { + "epoch": 1.225, + "grad_norm": 2.816314935684204, + "learning_rate": 2.4263984885088735e-05, + "loss": 0.5261, + "step": 392 + }, + { + "epoch": 1.228125, + "grad_norm": 2.5925676822662354, + "learning_rate": 2.4172071334754654e-05, + "loss": 0.4683, + "step": 393 + }, + { + "epoch": 1.23125, + "grad_norm": 3.6116645336151123, + "learning_rate": 2.4080188955466874e-05, + "loss": 1.066, + "step": 394 + }, + { + "epoch": 1.234375, + "grad_norm": 2.6395368576049805, + "learning_rate": 2.398834120654504e-05, + "loss": 0.5434, + "step": 395 + }, + { + "epoch": 1.2375, + "grad_norm": 2.5325918197631836, + "learning_rate": 2.3896531546004992e-05, + "loss": 0.4129, + "step": 396 + }, + { + "epoch": 1.240625, + "grad_norm": 1.9665679931640625, + "learning_rate": 2.380476343042855e-05, + "loss": 0.3367, + "step": 397 + }, + { + "epoch": 1.24375, + "grad_norm": 3.6547625064849854, + "learning_rate": 2.3713040314833403e-05, + "loss": 0.716, + "step": 398 + }, + { + "epoch": 1.246875, + "grad_norm": 2.7950963973999023, + "learning_rate": 2.3621365652543013e-05, + "loss": 0.4767, + "step": 399 + }, + { + "epoch": 1.25, + "grad_norm": 1.975703239440918, + "learning_rate": 2.3529742895056592e-05, + "loss": 0.3659, + "step": 400 + }, + { + "epoch": 1.253125, + "grad_norm": 2.8645551204681396, + "learning_rate": 2.3438175491919185e-05, + "loss": 0.4731, + "step": 401 + }, + { + "epoch": 1.25625, + "grad_norm": 2.649005889892578, + "learning_rate": 2.3346666890591757e-05, + "loss": 0.4562, + "step": 402 + }, + { + "epoch": 1.259375, + "grad_norm": 2.2082812786102295, + "learning_rate": 2.3255220536321427e-05, + "loss": 0.3397, + "step": 403 + }, + { + "epoch": 1.2625, + "grad_norm": 3.8959875106811523, + "learning_rate": 2.3163839872011763e-05, + "loss": 1.2082, + "step": 404 + }, + { + "epoch": 1.265625, + "grad_norm": 2.6099252700805664, + "learning_rate": 2.307252833809311e-05, + "loss": 0.6162, + "step": 405 + }, + { + "epoch": 1.26875, + "grad_norm": 2.4495608806610107, + "learning_rate": 2.298128937239311e-05, + "loss": 0.4767, + "step": 406 + }, + { + "epoch": 1.271875, + "grad_norm": 2.724579095840454, + "learning_rate": 2.2890126410007264e-05, + "loss": 0.4384, + "step": 407 + }, + { + "epoch": 1.275, + "grad_norm": 2.7510993480682373, + "learning_rate": 2.2799042883169576e-05, + "loss": 0.5368, + "step": 408 + }, + { + "epoch": 1.278125, + "grad_norm": 2.994795083999634, + "learning_rate": 2.270804222112335e-05, + "loss": 0.6885, + "step": 409 + }, + { + "epoch": 1.28125, + "grad_norm": 2.59830904006958, + "learning_rate": 2.2617127849992082e-05, + "loss": 0.4318, + "step": 410 + }, + { + "epoch": 1.284375, + "grad_norm": 2.603785276412964, + "learning_rate": 2.252630319265045e-05, + "loss": 0.5648, + "step": 411 + }, + { + "epoch": 1.2875, + "grad_norm": 1.8414777517318726, + "learning_rate": 2.2435571668595482e-05, + "loss": 0.3, + "step": 412 + }, + { + "epoch": 1.290625, + "grad_norm": 3.080265998840332, + "learning_rate": 2.2344936693817774e-05, + "loss": 0.573, + "step": 413 + }, + { + "epoch": 1.29375, + "grad_norm": 3.2287120819091797, + "learning_rate": 2.225440168067289e-05, + "loss": 0.6759, + "step": 414 + }, + { + "epoch": 1.296875, + "grad_norm": 3.5036377906799316, + "learning_rate": 2.216397003775291e-05, + "loss": 1.0739, + "step": 415 + }, + { + "epoch": 1.3, + "grad_norm": 3.4340429306030273, + "learning_rate": 2.207364516975808e-05, + "loss": 0.6794, + "step": 416 + }, + { + "epoch": 1.3, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8248050212860107, + "eval_VitaminC_cosine_ap": 0.549721039851088, + "eval_VitaminC_cosine_f1": 0.6675531914893617, + "eval_VitaminC_cosine_f1_threshold": 0.3625495135784149, + "eval_VitaminC_cosine_precision": 0.500998003992016, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.546875, + "eval_VitaminC_dot_accuracy_threshold": 315.43896484375, + "eval_VitaminC_dot_ap": 0.5352429908255126, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 129.65655517578125, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 12.217185974121094, + "eval_VitaminC_euclidean_ap": 0.5506836806067088, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.268470764160156, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 228.33251953125, + "eval_VitaminC_manhattan_ap": 0.5499105636757091, + "eval_VitaminC_manhattan_f1": 0.6666666666666667, + "eval_VitaminC_manhattan_f1_threshold": 475.83892822265625, + "eval_VitaminC_manhattan_precision": 0.501002004008016, + "eval_VitaminC_manhattan_recall": 0.9960159362549801, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 315.43896484375, + "eval_VitaminC_max_ap": 0.5506836806067088, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 475.83892822265625, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5506836806067088, + "eval_sts-test_pearson_cosine": 0.8783564854148046, + "eval_sts-test_pearson_dot": 0.8688921197467538, + "eval_sts-test_pearson_euclidean": 0.901280483137533, + "eval_sts-test_pearson_manhattan": 0.9014338360947061, + "eval_sts-test_pearson_max": 0.9014338360947061, + "eval_sts-test_spearman_cosine": 0.9035353066992244, + "eval_sts-test_spearman_dot": 0.8704091252307301, + "eval_sts-test_spearman_euclidean": 0.8982903693616295, + "eval_sts-test_spearman_manhattan": 0.897955987936513, + "eval_sts-test_spearman_max": 0.9035353066992244, + "eval_vitaminc-pairs_loss": 1.8975528478622437, + "eval_vitaminc-pairs_runtime": 1.8521, + "eval_vitaminc-pairs_samples_per_second": 58.313, + "eval_vitaminc-pairs_steps_per_second": 1.08, + "step": 416 + }, + { + "epoch": 1.3, + "eval_negation-triplets_loss": 0.7549135684967041, + "eval_negation-triplets_runtime": 0.2958, + "eval_negation-triplets_samples_per_second": 216.337, + "eval_negation-triplets_steps_per_second": 3.38, + "step": 416 + }, + { + "epoch": 1.3, + "eval_scitail-pairs-pos_loss": 0.07042308896780014, + "eval_scitail-pairs-pos_runtime": 0.3833, + "eval_scitail-pairs-pos_samples_per_second": 140.89, + "eval_scitail-pairs-pos_steps_per_second": 2.609, + "step": 416 + }, + { + "epoch": 1.3, + "eval_xsum-pairs_loss": 0.054973307996988297, + "eval_xsum-pairs_runtime": 2.8675, + "eval_xsum-pairs_samples_per_second": 44.639, + "eval_xsum-pairs_steps_per_second": 0.697, + "step": 416 + }, + { + "epoch": 1.3, + "eval_sciq_pairs_loss": 0.019865412265062332, + "eval_sciq_pairs_runtime": 3.6462, + "eval_sciq_pairs_samples_per_second": 35.105, + "eval_sciq_pairs_steps_per_second": 0.549, + "step": 416 + }, + { + "epoch": 1.3, + "eval_qasc_pairs_loss": 0.10839240998029709, + "eval_qasc_pairs_runtime": 0.6001, + "eval_qasc_pairs_samples_per_second": 213.308, + "eval_qasc_pairs_steps_per_second": 3.333, + "step": 416 + }, + { + "epoch": 1.3, + "eval_openbookqa_pairs_loss": 0.709105908870697, + "eval_openbookqa_pairs_runtime": 0.5773, + "eval_openbookqa_pairs_samples_per_second": 221.728, + "eval_openbookqa_pairs_steps_per_second": 3.464, + "step": 416 + }, + { + "epoch": 1.3, + "eval_msmarco_pairs_loss": 0.2810967266559601, + "eval_msmarco_pairs_runtime": 1.4691, + "eval_msmarco_pairs_samples_per_second": 87.125, + "eval_msmarco_pairs_steps_per_second": 1.361, + "step": 416 + }, + { + "epoch": 1.3, + "eval_nq_pairs_loss": 0.16148869693279266, + "eval_nq_pairs_runtime": 2.8649, + "eval_nq_pairs_samples_per_second": 44.679, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 416 + }, + { + "epoch": 1.3, + "eval_trivia_pairs_loss": 0.6475186944007874, + "eval_trivia_pairs_runtime": 4.403, + "eval_trivia_pairs_samples_per_second": 29.071, + "eval_trivia_pairs_steps_per_second": 0.454, + "step": 416 + }, + { + "epoch": 1.3, + "eval_gooaq_pairs_loss": 0.31666722893714905, + "eval_gooaq_pairs_runtime": 1.0071, + "eval_gooaq_pairs_samples_per_second": 127.1, + "eval_gooaq_pairs_steps_per_second": 1.986, + "step": 416 + }, + { + "epoch": 1.3, + "eval_paws-pos_loss": 0.025139717385172844, + "eval_paws-pos_runtime": 0.6875, + "eval_paws-pos_samples_per_second": 186.173, + "eval_paws-pos_steps_per_second": 2.909, + "step": 416 + }, + { + "epoch": 1.303125, + "grad_norm": 2.347867012023926, + "learning_rate": 2.1983430477368622e-05, + "loss": 0.4515, + "step": 417 + }, + { + "epoch": 1.30625, + "grad_norm": 2.957559585571289, + "learning_rate": 2.1893329357116726e-05, + "loss": 0.5992, + "step": 418 + }, + { + "epoch": 1.309375, + "grad_norm": 2.799776792526245, + "learning_rate": 2.180334520125863e-05, + "loss": 0.7221, + "step": 419 + }, + { + "epoch": 1.3125, + "grad_norm": 1.9639122486114502, + "learning_rate": 2.1713481397646955e-05, + "loss": 0.3968, + "step": 420 + }, + { + "epoch": 1.315625, + "grad_norm": 2.6604442596435547, + "learning_rate": 2.162374132960311e-05, + "loss": 0.4198, + "step": 421 + }, + { + "epoch": 1.31875, + "grad_norm": 2.5121357440948486, + "learning_rate": 2.1534128375789932e-05, + "loss": 0.6268, + "step": 422 + }, + { + "epoch": 1.321875, + "grad_norm": 2.014528274536133, + "learning_rate": 2.1444645910084495e-05, + "loss": 0.3976, + "step": 423 + }, + { + "epoch": 1.325, + "grad_norm": 2.713228464126587, + "learning_rate": 2.1355297301451044e-05, + "loss": 0.6003, + "step": 424 + }, + { + "epoch": 1.328125, + "grad_norm": 2.6102914810180664, + "learning_rate": 2.12660859138142e-05, + "loss": 0.4381, + "step": 425 + }, + { + "epoch": 1.33125, + "grad_norm": 3.1329894065856934, + "learning_rate": 2.1177015105932287e-05, + "loss": 0.8803, + "step": 426 + }, + { + "epoch": 1.334375, + "grad_norm": 2.3437535762786865, + "learning_rate": 2.108808823127087e-05, + "loss": 0.5635, + "step": 427 + }, + { + "epoch": 1.3375, + "grad_norm": 2.732607841491699, + "learning_rate": 2.0999308637876527e-05, + "loss": 0.5262, + "step": 428 + }, + { + "epoch": 1.340625, + "grad_norm": 2.553740978240967, + "learning_rate": 2.091067966825077e-05, + "loss": 0.6506, + "step": 429 + }, + { + "epoch": 1.34375, + "grad_norm": 2.2489590644836426, + "learning_rate": 2.0822204659224207e-05, + "loss": 0.3486, + "step": 430 + }, + { + "epoch": 1.346875, + "grad_norm": 3.328228235244751, + "learning_rate": 2.0733886941830926e-05, + "loss": 0.9099, + "step": 431 + }, + { + "epoch": 1.35, + "grad_norm": 2.4730563163757324, + "learning_rate": 2.064572984118307e-05, + "loss": 0.4199, + "step": 432 + }, + { + "epoch": 1.353125, + "grad_norm": 2.7208938598632812, + "learning_rate": 2.055773667634564e-05, + "loss": 0.4908, + "step": 433 + }, + { + "epoch": 1.35625, + "grad_norm": 2.666827440261841, + "learning_rate": 2.0469910760211578e-05, + "loss": 0.6869, + "step": 434 + }, + { + "epoch": 1.359375, + "grad_norm": 2.515075922012329, + "learning_rate": 2.038225539937698e-05, + "loss": 0.5644, + "step": 435 + }, + { + "epoch": 1.3625, + "grad_norm": 3.286777973175049, + "learning_rate": 2.0294773894016632e-05, + "loss": 0.6714, + "step": 436 + }, + { + "epoch": 1.365625, + "grad_norm": 2.477515935897827, + "learning_rate": 2.0207469537759766e-05, + "loss": 0.4976, + "step": 437 + }, + { + "epoch": 1.36875, + "grad_norm": 2.30999493598938, + "learning_rate": 2.0120345617566058e-05, + "loss": 0.4468, + "step": 438 + }, + { + "epoch": 1.371875, + "grad_norm": 2.011974573135376, + "learning_rate": 2.003340541360186e-05, + "loss": 0.3923, + "step": 439 + }, + { + "epoch": 1.375, + "grad_norm": 2.466869592666626, + "learning_rate": 1.9946652199116702e-05, + "loss": 0.5753, + "step": 440 + }, + { + "epoch": 1.378125, + "grad_norm": 2.6485002040863037, + "learning_rate": 1.986008924032009e-05, + "loss": 0.5134, + "step": 441 + }, + { + "epoch": 1.38125, + "grad_norm": 2.3299734592437744, + "learning_rate": 1.9773719796258484e-05, + "loss": 0.3858, + "step": 442 + }, + { + "epoch": 1.384375, + "grad_norm": 3.0803678035736084, + "learning_rate": 1.9687547118692646e-05, + "loss": 0.6681, + "step": 443 + }, + { + "epoch": 1.3875, + "grad_norm": 2.463984727859497, + "learning_rate": 1.960157445197518e-05, + "loss": 0.4702, + "step": 444 + }, + { + "epoch": 1.390625, + "grad_norm": 2.5118319988250732, + "learning_rate": 1.9515805032928393e-05, + "loss": 0.501, + "step": 445 + }, + { + "epoch": 1.39375, + "grad_norm": 2.670452356338501, + "learning_rate": 1.943024209072244e-05, + "loss": 0.459, + "step": 446 + }, + { + "epoch": 1.396875, + "grad_norm": 2.8598179817199707, + "learning_rate": 1.9344888846753727e-05, + "loss": 0.5879, + "step": 447 + }, + { + "epoch": 1.4, + "grad_norm": 2.703799247741699, + "learning_rate": 1.9259748514523654e-05, + "loss": 0.6276, + "step": 448 + }, + { + "epoch": 1.4, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8286198973655701, + "eval_VitaminC_cosine_ap": 0.5491639681085214, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.3577578365802765, + "eval_VitaminC_cosine_precision": 0.501002004008016, + "eval_VitaminC_cosine_recall": 0.9960159362549801, + "eval_VitaminC_dot_accuracy": 0.552734375, + "eval_VitaminC_dot_accuracy_threshold": 305.3611145019531, + "eval_VitaminC_dot_ap": 0.5346765167717246, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 120.80284118652344, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.638836860656738, + "eval_VitaminC_euclidean_ap": 0.551666574153856, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 22.694026947021484, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 345.3646240234375, + "eval_VitaminC_manhattan_ap": 0.5493612263798584, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 489.2554931640625, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 345.3646240234375, + "eval_VitaminC_max_ap": 0.551666574153856, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 489.2554931640625, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.551666574153856, + "eval_sts-test_pearson_cosine": 0.8776689405218701, + "eval_sts-test_pearson_dot": 0.8671968346407674, + "eval_sts-test_pearson_euclidean": 0.9011981135741202, + "eval_sts-test_pearson_manhattan": 0.901224194183572, + "eval_sts-test_pearson_max": 0.901224194183572, + "eval_sts-test_spearman_cosine": 0.9040082380296086, + "eval_sts-test_spearman_dot": 0.8686231471398608, + "eval_sts-test_spearman_euclidean": 0.8983323907960761, + "eval_sts-test_spearman_manhattan": 0.898603359683801, + "eval_sts-test_spearman_max": 0.9040082380296086, + "eval_vitaminc-pairs_loss": 1.8429665565490723, + "eval_vitaminc-pairs_runtime": 1.8248, + "eval_vitaminc-pairs_samples_per_second": 59.185, + "eval_vitaminc-pairs_steps_per_second": 1.096, + "step": 448 + }, + { + "epoch": 1.4, + "eval_negation-triplets_loss": 0.6982068419456482, + "eval_negation-triplets_runtime": 0.2935, + "eval_negation-triplets_samples_per_second": 218.06, + "eval_negation-triplets_steps_per_second": 3.407, + "step": 448 + }, + { + "epoch": 1.4, + "eval_scitail-pairs-pos_loss": 0.05678475275635719, + "eval_scitail-pairs-pos_runtime": 0.3607, + "eval_scitail-pairs-pos_samples_per_second": 149.722, + "eval_scitail-pairs-pos_steps_per_second": 2.773, + "step": 448 + }, + { + "epoch": 1.4, + "eval_xsum-pairs_loss": 0.04836395010352135, + "eval_xsum-pairs_runtime": 2.8385, + "eval_xsum-pairs_samples_per_second": 45.094, + "eval_xsum-pairs_steps_per_second": 0.705, + "step": 448 + }, + { + "epoch": 1.4, + "eval_sciq_pairs_loss": 0.019589349627494812, + "eval_sciq_pairs_runtime": 3.6678, + "eval_sciq_pairs_samples_per_second": 34.898, + "eval_sciq_pairs_steps_per_second": 0.545, + "step": 448 + }, + { + "epoch": 1.4, + "eval_qasc_pairs_loss": 0.11168085038661957, + "eval_qasc_pairs_runtime": 0.5997, + "eval_qasc_pairs_samples_per_second": 213.44, + "eval_qasc_pairs_steps_per_second": 3.335, + "step": 448 + }, + { + "epoch": 1.4, + "eval_openbookqa_pairs_loss": 0.7535218596458435, + "eval_openbookqa_pairs_runtime": 0.5778, + "eval_openbookqa_pairs_samples_per_second": 221.542, + "eval_openbookqa_pairs_steps_per_second": 3.462, + "step": 448 + }, + { + "epoch": 1.4, + "eval_msmarco_pairs_loss": 0.27821871638298035, + "eval_msmarco_pairs_runtime": 1.4582, + "eval_msmarco_pairs_samples_per_second": 87.779, + "eval_msmarco_pairs_steps_per_second": 1.372, + "step": 448 + }, + { + "epoch": 1.4, + "eval_nq_pairs_loss": 0.15653903782367706, + "eval_nq_pairs_runtime": 2.8546, + "eval_nq_pairs_samples_per_second": 44.84, + "eval_nq_pairs_steps_per_second": 0.701, + "step": 448 + }, + { + "epoch": 1.4, + "eval_trivia_pairs_loss": 0.6306825280189514, + "eval_trivia_pairs_runtime": 4.3878, + "eval_trivia_pairs_samples_per_second": 29.172, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 448 + }, + { + "epoch": 1.4, + "eval_gooaq_pairs_loss": 0.3191468417644501, + "eval_gooaq_pairs_runtime": 0.9973, + "eval_gooaq_pairs_samples_per_second": 128.345, + "eval_gooaq_pairs_steps_per_second": 2.005, + "step": 448 + }, + { + "epoch": 1.4, + "eval_paws-pos_loss": 0.024477336555719376, + "eval_paws-pos_runtime": 0.6847, + "eval_paws-pos_samples_per_second": 186.937, + "eval_paws-pos_steps_per_second": 2.921, + "step": 448 + }, + { + "epoch": 1.403125, + "grad_norm": 2.7174854278564453, + "learning_rate": 1.917482429951761e-05, + "loss": 0.5358, + "step": 449 + }, + { + "epoch": 1.40625, + "grad_norm": 2.997868061065674, + "learning_rate": 1.909011939908428e-05, + "loss": 0.8326, + "step": 450 + }, + { + "epoch": 1.409375, + "grad_norm": 2.0322728157043457, + "learning_rate": 1.90056370023153e-05, + "loss": 0.2866, + "step": 451 + }, + { + "epoch": 1.4125, + "grad_norm": 1.7908676862716675, + "learning_rate": 1.8921380289925155e-05, + "loss": 0.247, + "step": 452 + }, + { + "epoch": 1.415625, + "grad_norm": 2.5119776725769043, + "learning_rate": 1.8837352434131445e-05, + "loss": 0.519, + "step": 453 + }, + { + "epoch": 1.41875, + "grad_norm": 2.468385696411133, + "learning_rate": 1.8753556598535448e-05, + "loss": 0.4117, + "step": 454 + }, + { + "epoch": 1.421875, + "grad_norm": 2.097646713256836, + "learning_rate": 1.8669995938003007e-05, + "loss": 0.437, + "step": 455 + }, + { + "epoch": 1.425, + "grad_norm": 2.275872230529785, + "learning_rate": 1.8586673598545775e-05, + "loss": 0.3619, + "step": 456 + }, + { + "epoch": 1.428125, + "grad_norm": 2.5506107807159424, + "learning_rate": 1.8503592717202724e-05, + "loss": 0.4273, + "step": 457 + }, + { + "epoch": 1.43125, + "grad_norm": 2.219841718673706, + "learning_rate": 1.842075642192209e-05, + "loss": 0.2739, + "step": 458 + }, + { + "epoch": 1.434375, + "grad_norm": 2.54673433303833, + "learning_rate": 1.8338167831443567e-05, + "loss": 0.5714, + "step": 459 + }, + { + "epoch": 1.4375, + "grad_norm": 2.696007251739502, + "learning_rate": 1.82558300551809e-05, + "loss": 0.5485, + "step": 460 + }, + { + "epoch": 1.440625, + "grad_norm": 2.292741537094116, + "learning_rate": 1.8173746193104848e-05, + "loss": 0.4829, + "step": 461 + }, + { + "epoch": 1.44375, + "grad_norm": 2.3757193088531494, + "learning_rate": 1.80919193356264e-05, + "loss": 0.4904, + "step": 462 + }, + { + "epoch": 1.446875, + "grad_norm": 3.299426555633545, + "learning_rate": 1.801035256348051e-05, + "loss": 0.6449, + "step": 463 + }, + { + "epoch": 1.45, + "grad_norm": 3.2711825370788574, + "learning_rate": 1.7929048947610038e-05, + "loss": 0.6896, + "step": 464 + }, + { + "epoch": 1.453125, + "grad_norm": 2.4364447593688965, + "learning_rate": 1.7848011549050174e-05, + "loss": 0.4174, + "step": 465 + }, + { + "epoch": 1.45625, + "grad_norm": 2.7479851245880127, + "learning_rate": 1.776724341881316e-05, + "loss": 0.5254, + "step": 466 + }, + { + "epoch": 1.459375, + "grad_norm": 2.636861801147461, + "learning_rate": 1.7686747597773465e-05, + "loss": 0.5287, + "step": 467 + }, + { + "epoch": 1.4625, + "grad_norm": 1.8790123462677002, + "learning_rate": 1.7606527116553243e-05, + "loss": 0.2421, + "step": 468 + }, + { + "epoch": 1.465625, + "grad_norm": 2.039740800857544, + "learning_rate": 1.7526584995408277e-05, + "loss": 0.3939, + "step": 469 + }, + { + "epoch": 1.46875, + "grad_norm": 3.1484439373016357, + "learning_rate": 1.744692424411424e-05, + "loss": 0.7248, + "step": 470 + }, + { + "epoch": 1.471875, + "grad_norm": 2.309475898742676, + "learning_rate": 1.7367547861853396e-05, + "loss": 0.3479, + "step": 471 + }, + { + "epoch": 1.475, + "grad_norm": 2.4634172916412354, + "learning_rate": 1.7288458837101676e-05, + "loss": 0.472, + "step": 472 + }, + { + "epoch": 1.478125, + "grad_norm": 2.701162815093994, + "learning_rate": 1.7209660147516157e-05, + "loss": 0.5639, + "step": 473 + }, + { + "epoch": 1.48125, + "grad_norm": 2.2868311405181885, + "learning_rate": 1.713115475982297e-05, + "loss": 0.4077, + "step": 474 + }, + { + "epoch": 1.484375, + "grad_norm": 2.256727933883667, + "learning_rate": 1.705294562970558e-05, + "loss": 0.3173, + "step": 475 + }, + { + "epoch": 1.4875, + "grad_norm": 2.110504388809204, + "learning_rate": 1.6975035701693544e-05, + "loss": 0.3307, + "step": 476 + }, + { + "epoch": 1.490625, + "grad_norm": 2.267214059829712, + "learning_rate": 1.6897427909051608e-05, + "loss": 0.3761, + "step": 477 + }, + { + "epoch": 1.49375, + "grad_norm": 2.538956880569458, + "learning_rate": 1.6820125173669307e-05, + "loss": 0.5454, + "step": 478 + }, + { + "epoch": 1.496875, + "grad_norm": 1.8530148267745972, + "learning_rate": 1.6743130405950932e-05, + "loss": 0.309, + "step": 479 + }, + { + "epoch": 1.5, + "grad_norm": 2.507021903991699, + "learning_rate": 1.6666446504705974e-05, + "loss": 0.4082, + "step": 480 + }, + { + "epoch": 1.5, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7370772361755371, + "eval_VitaminC_cosine_ap": 0.5534084328915541, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2802589535713196, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.560546875, + "eval_VitaminC_dot_accuracy_threshold": 308.4664611816406, + "eval_VitaminC_dot_ap": 0.5342245787700969, + "eval_VitaminC_dot_f1": 0.6666666666666666, + "eval_VitaminC_dot_f1_threshold": 113.09681701660156, + "eval_VitaminC_dot_precision": 0.5, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 11.881275177001953, + "eval_VitaminC_euclidean_ap": 0.5562125403421339, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 22.934049606323242, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 235.32266235351562, + "eval_VitaminC_manhattan_ap": 0.5543420221752726, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 492.56402587890625, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 308.4664611816406, + "eval_VitaminC_max_ap": 0.5562125403421339, + "eval_VitaminC_max_f1": 0.6666666666666666, + "eval_VitaminC_max_f1_threshold": 492.56402587890625, + "eval_VitaminC_max_precision": 0.5, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5562125403421339, + "eval_sts-test_pearson_cosine": 0.8785940980445964, + "eval_sts-test_pearson_dot": 0.868901198999867, + "eval_sts-test_pearson_euclidean": 0.9008999462703983, + "eval_sts-test_pearson_manhattan": 0.9007358817864316, + "eval_sts-test_pearson_max": 0.9008999462703983, + "eval_sts-test_spearman_cosine": 0.9034113658980666, + "eval_sts-test_spearman_dot": 0.8689611981684112, + "eval_sts-test_spearman_euclidean": 0.8982906826204593, + "eval_sts-test_spearman_manhattan": 0.8980298275178087, + "eval_sts-test_spearman_max": 0.9034113658980666, + "eval_vitaminc-pairs_loss": 1.8594883680343628, + "eval_vitaminc-pairs_runtime": 1.8263, + "eval_vitaminc-pairs_samples_per_second": 59.137, + "eval_vitaminc-pairs_steps_per_second": 1.095, + "step": 480 + }, + { + "epoch": 1.5, + "eval_negation-triplets_loss": 0.7203199863433838, + "eval_negation-triplets_runtime": 0.2915, + "eval_negation-triplets_samples_per_second": 219.564, + "eval_negation-triplets_steps_per_second": 3.431, + "step": 480 + }, + { + "epoch": 1.5, + "eval_scitail-pairs-pos_loss": 0.07524989545345306, + "eval_scitail-pairs-pos_runtime": 0.37, + "eval_scitail-pairs-pos_samples_per_second": 145.939, + "eval_scitail-pairs-pos_steps_per_second": 2.703, + "step": 480 + }, + { + "epoch": 1.5, + "eval_xsum-pairs_loss": 0.04331779107451439, + "eval_xsum-pairs_runtime": 2.8387, + "eval_xsum-pairs_samples_per_second": 45.091, + "eval_xsum-pairs_steps_per_second": 0.705, + "step": 480 + }, + { + "epoch": 1.5, + "eval_sciq_pairs_loss": 0.018652573227882385, + "eval_sciq_pairs_runtime": 3.6202, + "eval_sciq_pairs_samples_per_second": 35.357, + "eval_sciq_pairs_steps_per_second": 0.552, + "step": 480 + }, + { + "epoch": 1.5, + "eval_qasc_pairs_loss": 0.10793650150299072, + "eval_qasc_pairs_runtime": 0.5983, + "eval_qasc_pairs_samples_per_second": 213.952, + "eval_qasc_pairs_steps_per_second": 3.343, + "step": 480 + }, + { + "epoch": 1.5, + "eval_openbookqa_pairs_loss": 0.6959180235862732, + "eval_openbookqa_pairs_runtime": 0.5741, + "eval_openbookqa_pairs_samples_per_second": 222.961, + "eval_openbookqa_pairs_steps_per_second": 3.484, + "step": 480 + }, + { + "epoch": 1.5, + "eval_msmarco_pairs_loss": 0.26085397601127625, + "eval_msmarco_pairs_runtime": 1.4595, + "eval_msmarco_pairs_samples_per_second": 87.699, + "eval_msmarco_pairs_steps_per_second": 1.37, + "step": 480 + }, + { + "epoch": 1.5, + "eval_nq_pairs_loss": 0.1553785651922226, + "eval_nq_pairs_runtime": 2.8659, + "eval_nq_pairs_samples_per_second": 44.663, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 480 + }, + { + "epoch": 1.5, + "eval_trivia_pairs_loss": 0.6472769379615784, + "eval_trivia_pairs_runtime": 4.3924, + "eval_trivia_pairs_samples_per_second": 29.141, + "eval_trivia_pairs_steps_per_second": 0.455, + "step": 480 + }, + { + "epoch": 1.5, + "eval_gooaq_pairs_loss": 0.3059709370136261, + "eval_gooaq_pairs_runtime": 0.9999, + "eval_gooaq_pairs_samples_per_second": 128.009, + "eval_gooaq_pairs_steps_per_second": 2.0, + "step": 480 + }, + { + "epoch": 1.5, + "eval_paws-pos_loss": 0.02474558725953102, + "eval_paws-pos_runtime": 0.6798, + "eval_paws-pos_samples_per_second": 188.303, + "eval_paws-pos_steps_per_second": 2.942, + "step": 480 + }, + { + "epoch": 1.503125, + "grad_norm": 1.756934404373169, + "learning_rate": 1.6590076357039962e-05, + "loss": 0.2147, + "step": 481 + }, + { + "epoch": 1.50625, + "grad_norm": 2.775935411453247, + "learning_rate": 1.6514022838245802e-05, + "loss": 0.5614, + "step": 482 + }, + { + "epoch": 1.509375, + "grad_norm": 2.4856698513031006, + "learning_rate": 1.6438288811695494e-05, + "loss": 0.3865, + "step": 483 + }, + { + "epoch": 1.5125, + "grad_norm": 1.2785615921020508, + "learning_rate": 1.636287712873232e-05, + "loss": 0.1715, + "step": 484 + }, + { + "epoch": 1.515625, + "grad_norm": 2.2189393043518066, + "learning_rate": 1.6287790628563536e-05, + "loss": 0.3597, + "step": 485 + }, + { + "epoch": 1.51875, + "grad_norm": 2.2382972240448, + "learning_rate": 1.6213032138153418e-05, + "loss": 0.3827, + "step": 486 + }, + { + "epoch": 1.521875, + "grad_norm": 2.6651275157928467, + "learning_rate": 1.613860447211689e-05, + "loss": 0.4895, + "step": 487 + }, + { + "epoch": 1.525, + "grad_norm": 2.810739517211914, + "learning_rate": 1.60645104326135e-05, + "loss": 0.4987, + "step": 488 + }, + { + "epoch": 1.528125, + "grad_norm": 2.383479595184326, + "learning_rate": 1.599075280924197e-05, + "loss": 0.4482, + "step": 489 + }, + { + "epoch": 1.53125, + "grad_norm": 2.4470787048339844, + "learning_rate": 1.5917334378935118e-05, + "loss": 0.5808, + "step": 490 + }, + { + "epoch": 1.534375, + "grad_norm": 2.437572956085205, + "learning_rate": 1.584425790585536e-05, + "loss": 0.3916, + "step": 491 + }, + { + "epoch": 1.5375, + "grad_norm": 3.223665952682495, + "learning_rate": 1.5771526141290602e-05, + "loss": 1.0877, + "step": 492 + }, + { + "epoch": 1.540625, + "grad_norm": 2.521468162536621, + "learning_rate": 1.5699141823550662e-05, + "loss": 0.4119, + "step": 493 + }, + { + "epoch": 1.54375, + "grad_norm": 2.7671728134155273, + "learning_rate": 1.562710767786421e-05, + "loss": 0.6078, + "step": 494 + }, + { + "epoch": 1.546875, + "grad_norm": 1.7431325912475586, + "learning_rate": 1.5555426416276095e-05, + "loss": 0.2441, + "step": 495 + }, + { + "epoch": 1.55, + "grad_norm": 2.172173261642456, + "learning_rate": 1.548410073754532e-05, + "loss": 0.4769, + "step": 496 + }, + { + "epoch": 1.553125, + "grad_norm": 1.587640404701233, + "learning_rate": 1.5413133327043365e-05, + "loss": 0.218, + "step": 497 + }, + { + "epoch": 1.55625, + "grad_norm": 2.7734944820404053, + "learning_rate": 1.5342526856653133e-05, + "loss": 0.6377, + "step": 498 + }, + { + "epoch": 1.559375, + "grad_norm": 1.6427900791168213, + "learning_rate": 1.5272283984668313e-05, + "loss": 0.2391, + "step": 499 + }, + { + "epoch": 1.5625, + "grad_norm": 2.130922794342041, + "learning_rate": 1.5202407355693354e-05, + "loss": 0.3645, + "step": 500 + }, + { + "epoch": 1.565625, + "grad_norm": 2.3365015983581543, + "learning_rate": 1.5132899600543823e-05, + "loss": 0.4185, + "step": 501 + }, + { + "epoch": 1.56875, + "grad_norm": 1.7738977670669556, + "learning_rate": 1.5063763336147424e-05, + "loss": 0.3363, + "step": 502 + }, + { + "epoch": 1.571875, + "grad_norm": 1.8385276794433594, + "learning_rate": 1.4995001165445442e-05, + "loss": 0.3712, + "step": 503 + }, + { + "epoch": 1.575, + "grad_norm": 1.8053840398788452, + "learning_rate": 1.4926615677294724e-05, + "loss": 0.2995, + "step": 504 + }, + { + "epoch": 1.578125, + "grad_norm": 2.7845582962036133, + "learning_rate": 1.4858609446370264e-05, + "loss": 0.6178, + "step": 505 + }, + { + "epoch": 1.58125, + "grad_norm": 2.369316339492798, + "learning_rate": 1.4790985033068205e-05, + "loss": 0.464, + "step": 506 + }, + { + "epoch": 1.584375, + "grad_norm": 2.4763267040252686, + "learning_rate": 1.4723744983409498e-05, + "loss": 0.5694, + "step": 507 + }, + { + "epoch": 1.5875, + "grad_norm": 2.1269421577453613, + "learning_rate": 1.4656891828943997e-05, + "loss": 0.3587, + "step": 508 + }, + { + "epoch": 1.590625, + "grad_norm": 2.028308629989624, + "learning_rate": 1.4590428086655196e-05, + "loss": 0.3375, + "step": 509 + }, + { + "epoch": 1.59375, + "grad_norm": 1.3677244186401367, + "learning_rate": 1.4524356258865409e-05, + "loss": 0.1613, + "step": 510 + }, + { + "epoch": 1.596875, + "grad_norm": 1.846962571144104, + "learning_rate": 1.4458678833141626e-05, + "loss": 0.2811, + "step": 511 + }, + { + "epoch": 1.6, + "grad_norm": 2.5623536109924316, + "learning_rate": 1.4393398282201789e-05, + "loss": 0.5338, + "step": 512 + }, + { + "epoch": 1.6, + "eval_VitaminC_cosine_accuracy": 0.5625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7150193452835083, + "eval_VitaminC_cosine_ap": 0.5536001409238264, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.3747650980949402, + "eval_VitaminC_cosine_precision": 0.501002004008016, + "eval_VitaminC_cosine_recall": 0.9960159362549801, + "eval_VitaminC_dot_accuracy": 0.55859375, + "eval_VitaminC_dot_accuracy_threshold": 305.93060302734375, + "eval_VitaminC_dot_ap": 0.5361490037017673, + "eval_VitaminC_dot_f1": 0.6684563758389263, + "eval_VitaminC_dot_f1_threshold": 141.05189514160156, + "eval_VitaminC_dot_precision": 0.5040485829959515, + "eval_VitaminC_dot_recall": 0.9920318725099602, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 12.17225456237793, + "eval_VitaminC_euclidean_ap": 0.5553095900623441, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 23.013614654541016, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.560546875, + "eval_VitaminC_manhattan_accuracy_threshold": 306.5001220703125, + "eval_VitaminC_manhattan_ap": 0.5528524184849768, + "eval_VitaminC_manhattan_f1": 0.6675531914893617, + "eval_VitaminC_manhattan_f1_threshold": 482.4728088378906, + "eval_VitaminC_manhattan_precision": 0.500998003992016, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5625, + "eval_VitaminC_max_accuracy_threshold": 306.5001220703125, + "eval_VitaminC_max_ap": 0.5553095900623441, + "eval_VitaminC_max_f1": 0.6684563758389263, + "eval_VitaminC_max_f1_threshold": 482.4728088378906, + "eval_VitaminC_max_precision": 0.5040485829959515, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5553095900623441, + "eval_sts-test_pearson_cosine": 0.88002263195295, + "eval_sts-test_pearson_dot": 0.8704058648822381, + "eval_sts-test_pearson_euclidean": 0.9024307031663734, + "eval_sts-test_pearson_manhattan": 0.902236666405867, + "eval_sts-test_pearson_max": 0.9024307031663734, + "eval_sts-test_spearman_cosine": 0.9043963657196562, + "eval_sts-test_spearman_dot": 0.8703829009915547, + "eval_sts-test_spearman_euclidean": 0.8986995748957924, + "eval_sts-test_spearman_manhattan": 0.8993764824755988, + "eval_sts-test_spearman_max": 0.9043963657196562, + "eval_vitaminc-pairs_loss": 1.8544398546218872, + "eval_vitaminc-pairs_runtime": 1.8317, + "eval_vitaminc-pairs_samples_per_second": 58.961, + "eval_vitaminc-pairs_steps_per_second": 1.092, + "step": 512 + }, + { + "epoch": 1.6, + "eval_negation-triplets_loss": 0.7161268591880798, + "eval_negation-triplets_runtime": 0.2916, + "eval_negation-triplets_samples_per_second": 219.445, + "eval_negation-triplets_steps_per_second": 3.429, + "step": 512 + }, + { + "epoch": 1.6, + "eval_scitail-pairs-pos_loss": 0.07522901147603989, + "eval_scitail-pairs-pos_runtime": 0.3667, + "eval_scitail-pairs-pos_samples_per_second": 147.259, + "eval_scitail-pairs-pos_steps_per_second": 2.727, + "step": 512 + }, + { + "epoch": 1.6, + "eval_xsum-pairs_loss": 0.04067877307534218, + "eval_xsum-pairs_runtime": 2.8345, + "eval_xsum-pairs_samples_per_second": 45.157, + "eval_xsum-pairs_steps_per_second": 0.706, + "step": 512 + }, + { + "epoch": 1.6, + "eval_sciq_pairs_loss": 0.01821758784353733, + "eval_sciq_pairs_runtime": 3.6099, + "eval_sciq_pairs_samples_per_second": 35.459, + "eval_sciq_pairs_steps_per_second": 0.554, + "step": 512 + }, + { + "epoch": 1.6, + "eval_qasc_pairs_loss": 0.10426162928342819, + "eval_qasc_pairs_runtime": 0.5966, + "eval_qasc_pairs_samples_per_second": 214.562, + "eval_qasc_pairs_steps_per_second": 3.353, + "step": 512 + }, + { + "epoch": 1.6, + "eval_openbookqa_pairs_loss": 0.6913560032844543, + "eval_openbookqa_pairs_runtime": 0.5728, + "eval_openbookqa_pairs_samples_per_second": 223.453, + "eval_openbookqa_pairs_steps_per_second": 3.491, + "step": 512 + }, + { + "epoch": 1.6, + "eval_msmarco_pairs_loss": 0.2564995586872101, + "eval_msmarco_pairs_runtime": 1.4587, + "eval_msmarco_pairs_samples_per_second": 87.749, + "eval_msmarco_pairs_steps_per_second": 1.371, + "step": 512 + }, + { + "epoch": 1.6, + "eval_nq_pairs_loss": 0.14494968950748444, + "eval_nq_pairs_runtime": 2.8504, + "eval_nq_pairs_samples_per_second": 44.907, + "eval_nq_pairs_steps_per_second": 0.702, + "step": 512 + }, + { + "epoch": 1.6, + "eval_trivia_pairs_loss": 0.633898913860321, + "eval_trivia_pairs_runtime": 4.3846, + "eval_trivia_pairs_samples_per_second": 29.193, + "eval_trivia_pairs_steps_per_second": 0.456, + "step": 512 + }, + { + "epoch": 1.6, + "eval_gooaq_pairs_loss": 0.29749810695648193, + "eval_gooaq_pairs_runtime": 1.0002, + "eval_gooaq_pairs_samples_per_second": 127.979, + "eval_gooaq_pairs_steps_per_second": 2.0, + "step": 512 + }, + { + "epoch": 1.6, + "eval_paws-pos_loss": 0.025082813575863838, + "eval_paws-pos_runtime": 0.6849, + "eval_paws-pos_samples_per_second": 186.893, + "eval_paws-pos_steps_per_second": 2.92, + "step": 512 + }, + { + "epoch": 1.603125, + "grad_norm": 1.237898349761963, + "learning_rate": 1.4328517063821754e-05, + "loss": 0.1862, + "step": 513 + }, + { + "epoch": 1.60625, + "grad_norm": 3.120419502258301, + "learning_rate": 1.4264037620742724e-05, + "loss": 0.6092, + "step": 514 + }, + { + "epoch": 1.609375, + "grad_norm": 2.872905969619751, + "learning_rate": 1.4199962380579275e-05, + "loss": 0.541, + "step": 515 + }, + { + "epoch": 1.6125, + "grad_norm": 2.554291248321533, + "learning_rate": 1.4136293755728e-05, + "loss": 0.5297, + "step": 516 + }, + { + "epoch": 1.615625, + "grad_norm": 0.818438708782196, + "learning_rate": 1.4073034143276623e-05, + "loss": 0.0664, + "step": 517 + }, + { + "epoch": 1.61875, + "grad_norm": 1.3617022037506104, + "learning_rate": 1.401018592491381e-05, + "loss": 0.1557, + "step": 518 + }, + { + "epoch": 1.621875, + "grad_norm": 1.975934386253357, + "learning_rate": 1.3947751466839452e-05, + "loss": 0.3281, + "step": 519 + }, + { + "epoch": 1.625, + "grad_norm": 2.3073935508728027, + "learning_rate": 1.3885733119675617e-05, + "loss": 0.3828, + "step": 520 + }, + { + "epoch": 1.628125, + "grad_norm": 1.2710379362106323, + "learning_rate": 1.382413321837801e-05, + "loss": 0.2087, + "step": 521 + }, + { + "epoch": 1.63125, + "grad_norm": 2.7534079551696777, + "learning_rate": 1.3762954082148114e-05, + "loss": 0.5306, + "step": 522 + }, + { + "epoch": 1.634375, + "grad_norm": 3.0414681434631348, + "learning_rate": 1.3702198014345816e-05, + "loss": 0.6589, + "step": 523 + }, + { + "epoch": 1.6375, + "grad_norm": 2.3352811336517334, + "learning_rate": 1.3641867302402734e-05, + "loss": 0.425, + "step": 524 + }, + { + "epoch": 1.640625, + "grad_norm": 2.76236629486084, + "learning_rate": 1.3581964217736077e-05, + "loss": 0.5026, + "step": 525 + }, + { + "epoch": 1.64375, + "grad_norm": 2.5108022689819336, + "learning_rate": 1.3522491015663117e-05, + "loss": 0.5667, + "step": 526 + }, + { + "epoch": 1.646875, + "grad_norm": 2.4024035930633545, + "learning_rate": 1.3463449935316308e-05, + "loss": 0.4748, + "step": 527 + }, + { + "epoch": 1.65, + "grad_norm": 2.772578239440918, + "learning_rate": 1.3404843199558945e-05, + "loss": 0.5094, + "step": 528 + }, + { + "epoch": 1.653125, + "grad_norm": 2.2362611293792725, + "learning_rate": 1.3346673014901517e-05, + "loss": 0.3398, + "step": 529 + }, + { + "epoch": 1.65625, + "grad_norm": 1.5350793600082397, + "learning_rate": 1.3288941571418583e-05, + "loss": 0.1932, + "step": 530 + }, + { + "epoch": 1.659375, + "grad_norm": 2.147125720977783, + "learning_rate": 1.3231651042666376e-05, + "loss": 0.4233, + "step": 531 + }, + { + "epoch": 1.6625, + "grad_norm": 2.5387678146362305, + "learning_rate": 1.3174803585600908e-05, + "loss": 0.5848, + "step": 532 + }, + { + "epoch": 1.665625, + "grad_norm": 2.3380072116851807, + "learning_rate": 1.3118401340496819e-05, + "loss": 0.5076, + "step": 533 + }, + { + "epoch": 1.66875, + "grad_norm": 2.097322463989258, + "learning_rate": 1.3062446430866749e-05, + "loss": 0.286, + "step": 534 + }, + { + "epoch": 1.671875, + "grad_norm": 2.5456178188323975, + "learning_rate": 1.3006940963381425e-05, + "loss": 0.5221, + "step": 535 + }, + { + "epoch": 1.675, + "grad_norm": 2.5779526233673096, + "learning_rate": 1.295188702779033e-05, + "loss": 0.579, + "step": 536 + }, + { + "epoch": 1.678125, + "grad_norm": 1.9412658214569092, + "learning_rate": 1.2897286696843012e-05, + "loss": 0.2717, + "step": 537 + }, + { + "epoch": 1.68125, + "grad_norm": 2.2857954502105713, + "learning_rate": 1.2843142026211081e-05, + "loss": 0.4727, + "step": 538 + }, + { + "epoch": 1.684375, + "grad_norm": 2.2698121070861816, + "learning_rate": 1.2789455054410776e-05, + "loss": 0.3777, + "step": 539 + }, + { + "epoch": 1.6875, + "grad_norm": 2.2447919845581055, + "learning_rate": 1.2736227802726247e-05, + "loss": 0.537, + "step": 540 + }, + { + "epoch": 1.690625, + "grad_norm": 3.1389870643615723, + "learning_rate": 1.268346227513343e-05, + "loss": 0.6935, + "step": 541 + }, + { + "epoch": 1.69375, + "grad_norm": 1.925352931022644, + "learning_rate": 1.2631160458224625e-05, + "loss": 0.2929, + "step": 542 + }, + { + "epoch": 1.696875, + "grad_norm": 2.683356761932373, + "learning_rate": 1.2579324321133666e-05, + "loss": 0.5495, + "step": 543 + }, + { + "epoch": 1.7, + "grad_norm": 2.3518059253692627, + "learning_rate": 1.2527955815461821e-05, + "loss": 0.3767, + "step": 544 + }, + { + "epoch": 1.7, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7074875235557556, + "eval_VitaminC_cosine_ap": 0.5537116985905202, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2738235890865326, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.552734375, + "eval_VitaminC_dot_accuracy_threshold": 308.73809814453125, + "eval_VitaminC_dot_ap": 0.5356558215645612, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 142.89981079101562, + "eval_VitaminC_dot_precision": 0.5030425963488844, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 14.646638870239258, + "eval_VitaminC_euclidean_ap": 0.5553327582256045, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 23.463809967041016, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 310.8325500488281, + "eval_VitaminC_manhattan_ap": 0.5530353867429494, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 497.66796875, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 310.8325500488281, + "eval_VitaminC_max_ap": 0.5553327582256045, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 497.66796875, + "eval_VitaminC_max_precision": 0.5030425963488844, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5553327582256045, + "eval_sts-test_pearson_cosine": 0.8785811955197258, + "eval_sts-test_pearson_dot": 0.8673295777318735, + "eval_sts-test_pearson_euclidean": 0.9018792837542462, + "eval_sts-test_pearson_manhattan": 0.9016741452222354, + "eval_sts-test_pearson_max": 0.9018792837542462, + "eval_sts-test_spearman_cosine": 0.9040249302501078, + "eval_sts-test_spearman_dot": 0.8683179882884328, + "eval_sts-test_spearman_euclidean": 0.8988373640296166, + "eval_sts-test_spearman_manhattan": 0.8983056295417639, + "eval_sts-test_spearman_max": 0.9040249302501078, + "eval_vitaminc-pairs_loss": 1.8782049417495728, + "eval_vitaminc-pairs_runtime": 1.831, + "eval_vitaminc-pairs_samples_per_second": 58.986, + "eval_vitaminc-pairs_steps_per_second": 1.092, + "step": 544 + }, + { + "epoch": 1.7, + "eval_negation-triplets_loss": 0.719520628452301, + "eval_negation-triplets_runtime": 0.294, + "eval_negation-triplets_samples_per_second": 217.687, + "eval_negation-triplets_steps_per_second": 3.401, + "step": 544 + }, + { + "epoch": 1.7, + "eval_scitail-pairs-pos_loss": 0.06700660288333893, + "eval_scitail-pairs-pos_runtime": 0.3659, + "eval_scitail-pairs-pos_samples_per_second": 147.579, + "eval_scitail-pairs-pos_steps_per_second": 2.733, + "step": 544 + }, + { + "epoch": 1.7, + "eval_xsum-pairs_loss": 0.03577294573187828, + "eval_xsum-pairs_runtime": 2.8359, + "eval_xsum-pairs_samples_per_second": 45.136, + "eval_xsum-pairs_steps_per_second": 0.705, + "step": 544 + }, + { + "epoch": 1.7, + "eval_sciq_pairs_loss": 0.018292119726538658, + "eval_sciq_pairs_runtime": 3.6233, + "eval_sciq_pairs_samples_per_second": 35.327, + "eval_sciq_pairs_steps_per_second": 0.552, + "step": 544 + }, + { + "epoch": 1.7, + "eval_qasc_pairs_loss": 0.10864048451185226, + "eval_qasc_pairs_runtime": 0.5959, + "eval_qasc_pairs_samples_per_second": 214.784, + "eval_qasc_pairs_steps_per_second": 3.356, + "step": 544 + }, + { + "epoch": 1.7, + "eval_openbookqa_pairs_loss": 0.696479082107544, + "eval_openbookqa_pairs_runtime": 0.5743, + "eval_openbookqa_pairs_samples_per_second": 222.885, + "eval_openbookqa_pairs_steps_per_second": 3.483, + "step": 544 + }, + { + "epoch": 1.7, + "eval_msmarco_pairs_loss": 0.24125610291957855, + "eval_msmarco_pairs_runtime": 1.4595, + "eval_msmarco_pairs_samples_per_second": 87.699, + "eval_msmarco_pairs_steps_per_second": 1.37, + "step": 544 + }, + { + "epoch": 1.7, + "eval_nq_pairs_loss": 0.15616978704929352, + "eval_nq_pairs_runtime": 2.8639, + "eval_nq_pairs_samples_per_second": 44.694, + "eval_nq_pairs_steps_per_second": 0.698, + "step": 544 + }, + { + "epoch": 1.7, + "eval_trivia_pairs_loss": 0.6436348557472229, + "eval_trivia_pairs_runtime": 4.377, + "eval_trivia_pairs_samples_per_second": 29.244, + "eval_trivia_pairs_steps_per_second": 0.457, + "step": 544 + }, + { + "epoch": 1.7, + "eval_gooaq_pairs_loss": 0.30042433738708496, + "eval_gooaq_pairs_runtime": 1.0002, + "eval_gooaq_pairs_samples_per_second": 127.981, + "eval_gooaq_pairs_steps_per_second": 2.0, + "step": 544 + }, + { + "epoch": 1.7, + "eval_paws-pos_loss": 0.02469758875668049, + "eval_paws-pos_runtime": 0.6819, + "eval_paws-pos_samples_per_second": 187.706, + "eval_paws-pos_steps_per_second": 2.933, + "step": 544 + }, + { + "epoch": 1.703125, + "grad_norm": 2.188075065612793, + "learning_rate": 1.2477056875204302e-05, + "loss": 0.4054, + "step": 545 + }, + { + "epoch": 1.70625, + "grad_norm": 2.5551207065582275, + "learning_rate": 1.242662941667743e-05, + "loss": 0.4114, + "step": 546 + }, + { + "epoch": 1.709375, + "grad_norm": 2.614218235015869, + "learning_rate": 1.2376675338446527e-05, + "loss": 0.4774, + "step": 547 + }, + { + "epoch": 1.7125, + "grad_norm": 1.4668488502502441, + "learning_rate": 1.2327196521254394e-05, + "loss": 0.1662, + "step": 548 + }, + { + "epoch": 1.715625, + "grad_norm": 2.075801372528076, + "learning_rate": 1.2278194827950544e-05, + "loss": 0.4634, + "step": 549 + }, + { + "epoch": 1.71875, + "grad_norm": 3.1399238109588623, + "learning_rate": 1.2229672103421021e-05, + "loss": 0.6514, + "step": 550 + }, + { + "epoch": 1.721875, + "grad_norm": 2.308095693588257, + "learning_rate": 1.2181630174518995e-05, + "loss": 0.3672, + "step": 551 + }, + { + "epoch": 1.725, + "grad_norm": 2.880965232849121, + "learning_rate": 1.213407084999592e-05, + "loss": 0.6115, + "step": 552 + }, + { + "epoch": 1.728125, + "grad_norm": 2.7592408657073975, + "learning_rate": 1.2086995920433495e-05, + "loss": 0.5445, + "step": 553 + }, + { + "epoch": 1.73125, + "grad_norm": 1.6351908445358276, + "learning_rate": 1.20404071581762e-05, + "loss": 0.2447, + "step": 554 + }, + { + "epoch": 1.734375, + "grad_norm": 1.5117764472961426, + "learning_rate": 1.199430631726461e-05, + "loss": 0.2566, + "step": 555 + }, + { + "epoch": 1.7375, + "grad_norm": 1.4923957586288452, + "learning_rate": 1.194869513336933e-05, + "loss": 0.208, + "step": 556 + }, + { + "epoch": 1.740625, + "grad_norm": 2.0138089656829834, + "learning_rate": 1.1903575323725649e-05, + "loss": 0.3175, + "step": 557 + }, + { + "epoch": 1.74375, + "grad_norm": 1.8065791130065918, + "learning_rate": 1.1858948587068904e-05, + "loss": 0.2546, + "step": 558 + }, + { + "epoch": 1.746875, + "grad_norm": 1.4454731941223145, + "learning_rate": 1.1814816603570499e-05, + "loss": 0.1709, + "step": 559 + }, + { + "epoch": 1.75, + "grad_norm": 2.613529682159424, + "learning_rate": 1.1771181034774677e-05, + "loss": 0.4799, + "step": 560 + }, + { + "epoch": 1.753125, + "grad_norm": 2.197608470916748, + "learning_rate": 1.1728043523535934e-05, + "loss": 0.5313, + "step": 561 + }, + { + "epoch": 1.75625, + "grad_norm": 2.056694269180298, + "learning_rate": 1.1685405693957192e-05, + "loss": 0.3248, + "step": 562 + }, + { + "epoch": 1.759375, + "grad_norm": 2.9212446212768555, + "learning_rate": 1.1643269151328634e-05, + "loss": 0.6279, + "step": 563 + }, + { + "epoch": 1.7625, + "grad_norm": 2.4438629150390625, + "learning_rate": 1.1601635482067272e-05, + "loss": 0.5193, + "step": 564 + }, + { + "epoch": 1.765625, + "grad_norm": 2.960676670074463, + "learning_rate": 1.1560506253657225e-05, + "loss": 0.6262, + "step": 565 + }, + { + "epoch": 1.76875, + "grad_norm": 2.2354516983032227, + "learning_rate": 1.1519883014590691e-05, + "loss": 0.4297, + "step": 566 + }, + { + "epoch": 1.771875, + "grad_norm": 2.175459623336792, + "learning_rate": 1.1479767294309671e-05, + "loss": 0.4763, + "step": 567 + }, + { + "epoch": 1.775, + "grad_norm": 2.5381572246551514, + "learning_rate": 1.1440160603148352e-05, + "loss": 0.5722, + "step": 568 + }, + { + "epoch": 1.778125, + "grad_norm": 2.3705122470855713, + "learning_rate": 1.140106443227627e-05, + "loss": 0.4347, + "step": 569 + }, + { + "epoch": 1.78125, + "grad_norm": 2.0581493377685547, + "learning_rate": 1.1362480253642165e-05, + "loss": 0.3271, + "step": 570 + }, + { + "epoch": 1.784375, + "grad_norm": 2.5319983959198, + "learning_rate": 1.1324409519918556e-05, + "loss": 0.5433, + "step": 571 + }, + { + "epoch": 1.7875, + "grad_norm": 0.7258579730987549, + "learning_rate": 1.128685366444704e-05, + "loss": 0.0637, + "step": 572 + }, + { + "epoch": 1.790625, + "grad_norm": 3.232028007507324, + "learning_rate": 1.1249814101184362e-05, + "loss": 0.9049, + "step": 573 + }, + { + "epoch": 1.79375, + "grad_norm": 2.510418653488159, + "learning_rate": 1.1213292224649134e-05, + "loss": 0.495, + "step": 574 + }, + { + "epoch": 1.796875, + "grad_norm": 1.644942045211792, + "learning_rate": 1.1177289409869374e-05, + "loss": 0.2218, + "step": 575 + }, + { + "epoch": 1.8, + "grad_norm": 3.1910505294799805, + "learning_rate": 1.11418070123307e-05, + "loss": 0.7491, + "step": 576 + }, + { + "epoch": 1.8, + "eval_VitaminC_cosine_accuracy": 0.5546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.8312963247299194, + "eval_VitaminC_cosine_ap": 0.5540818473167951, + "eval_VitaminC_cosine_f1": 0.6657754010695187, + "eval_VitaminC_cosine_f1_threshold": 0.3716816306114197, + "eval_VitaminC_cosine_precision": 0.5010060362173038, + "eval_VitaminC_cosine_recall": 0.9920318725099602, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 301.13458251953125, + "eval_VitaminC_dot_ap": 0.5336035822109861, + "eval_VitaminC_dot_f1": 0.6675639300134589, + "eval_VitaminC_dot_f1_threshold": 140.0170135498047, + "eval_VitaminC_dot_precision": 0.5040650406504065, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 14.30455493927002, + "eval_VitaminC_euclidean_ap": 0.5547765455338385, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 23.225872039794922, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 311.50494384765625, + "eval_VitaminC_manhattan_ap": 0.5520078360814107, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 491.16729736328125, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 311.50494384765625, + "eval_VitaminC_max_ap": 0.5547765455338385, + "eval_VitaminC_max_f1": 0.6675639300134589, + "eval_VitaminC_max_f1_threshold": 491.16729736328125, + "eval_VitaminC_max_precision": 0.5040650406504065, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5547765455338385, + "eval_sts-test_pearson_cosine": 0.8785522027028954, + "eval_sts-test_pearson_dot": 0.8677130233704464, + "eval_sts-test_pearson_euclidean": 0.901327101812411, + "eval_sts-test_pearson_manhattan": 0.9016459799124272, + "eval_sts-test_pearson_max": 0.9016459799124272, + "eval_sts-test_spearman_cosine": 0.9038277114411557, + "eval_sts-test_spearman_dot": 0.8689599898843539, + "eval_sts-test_spearman_euclidean": 0.8982747959226655, + "eval_sts-test_spearman_manhattan": 0.8983893144005659, + "eval_sts-test_spearman_max": 0.9038277114411557, + "eval_vitaminc-pairs_loss": 1.8366389274597168, + "eval_vitaminc-pairs_runtime": 1.8298, + "eval_vitaminc-pairs_samples_per_second": 59.021, + "eval_vitaminc-pairs_steps_per_second": 1.093, + "step": 576 + }, + { + "epoch": 1.8, + "eval_negation-triplets_loss": 0.7222614884376526, + "eval_negation-triplets_runtime": 0.292, + "eval_negation-triplets_samples_per_second": 219.186, + "eval_negation-triplets_steps_per_second": 3.425, + "step": 576 + }, + { + "epoch": 1.8, + "eval_scitail-pairs-pos_loss": 0.06263165920972824, + "eval_scitail-pairs-pos_runtime": 0.3693, + "eval_scitail-pairs-pos_samples_per_second": 146.22, + "eval_scitail-pairs-pos_steps_per_second": 2.708, + "step": 576 + }, + { + "epoch": 1.8, + "eval_xsum-pairs_loss": 0.038485851138830185, + "eval_xsum-pairs_runtime": 2.8422, + "eval_xsum-pairs_samples_per_second": 45.035, + "eval_xsum-pairs_steps_per_second": 0.704, + "step": 576 + }, + { + "epoch": 1.8, + "eval_sciq_pairs_loss": 0.017885908484458923, + "eval_sciq_pairs_runtime": 3.6267, + "eval_sciq_pairs_samples_per_second": 35.293, + "eval_sciq_pairs_steps_per_second": 0.551, + "step": 576 + }, + { + "epoch": 1.8, + "eval_qasc_pairs_loss": 0.11011218279600143, + "eval_qasc_pairs_runtime": 0.595, + "eval_qasc_pairs_samples_per_second": 215.135, + "eval_qasc_pairs_steps_per_second": 3.361, + "step": 576 + }, + { + "epoch": 1.8, + "eval_openbookqa_pairs_loss": 0.6921338438987732, + "eval_openbookqa_pairs_runtime": 0.573, + "eval_openbookqa_pairs_samples_per_second": 223.4, + "eval_openbookqa_pairs_steps_per_second": 3.491, + "step": 576 + }, + { + "epoch": 1.8, + "eval_msmarco_pairs_loss": 0.24500073492527008, + "eval_msmarco_pairs_runtime": 1.4604, + "eval_msmarco_pairs_samples_per_second": 87.65, + "eval_msmarco_pairs_steps_per_second": 1.37, + "step": 576 + }, + { + "epoch": 1.8, + "eval_nq_pairs_loss": 0.14756517112255096, + "eval_nq_pairs_runtime": 2.8567, + "eval_nq_pairs_samples_per_second": 44.806, + "eval_nq_pairs_steps_per_second": 0.7, + "step": 576 + }, + { + "epoch": 1.8, + "eval_trivia_pairs_loss": 0.6358833909034729, + "eval_trivia_pairs_runtime": 4.3759, + "eval_trivia_pairs_samples_per_second": 29.251, + "eval_trivia_pairs_steps_per_second": 0.457, + "step": 576 + }, + { + "epoch": 1.8, + "eval_gooaq_pairs_loss": 0.2909858226776123, + "eval_gooaq_pairs_runtime": 1.0026, + "eval_gooaq_pairs_samples_per_second": 127.667, + "eval_gooaq_pairs_steps_per_second": 1.995, + "step": 576 + }, + { + "epoch": 1.8, + "eval_paws-pos_loss": 0.02510605938732624, + "eval_paws-pos_runtime": 0.6858, + "eval_paws-pos_samples_per_second": 186.641, + "eval_paws-pos_steps_per_second": 2.916, + "step": 576 + } + ], + "logging_steps": 1, + "max_steps": 640, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 64, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 320, + "trial_name": null, + "trial_params": null +}