{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6, "eval_steps": 32, "global_step": 192, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003125, "grad_norm": 3.1757984161376953, "learning_rate": 3.125e-07, "loss": 0.7374, "step": 1 }, { "epoch": 0.00625, "grad_norm": 3.137390375137329, "learning_rate": 6.25e-07, "loss": 0.5723, "step": 2 }, { "epoch": 0.009375, "grad_norm": 2.765856981277466, "learning_rate": 9.375000000000001e-07, "loss": 0.551, "step": 3 }, { "epoch": 0.0125, "grad_norm": 3.468062162399292, "learning_rate": 1.25e-06, "loss": 0.7379, "step": 4 }, { "epoch": 0.015625, "grad_norm": 2.6695668697357178, "learning_rate": 1.5625e-06, "loss": 0.5271, "step": 5 }, { "epoch": 0.01875, "grad_norm": 2.7720863819122314, "learning_rate": 1.8750000000000003e-06, "loss": 0.5858, "step": 6 }, { "epoch": 0.021875, "grad_norm": 3.0211267471313477, "learning_rate": 2.1875000000000002e-06, "loss": 0.6562, "step": 7 }, { "epoch": 0.025, "grad_norm": 3.641108989715576, "learning_rate": 2.5e-06, "loss": 0.8228, "step": 8 }, { "epoch": 0.028125, "grad_norm": 3.9061200618743896, "learning_rate": 2.8125e-06, "loss": 0.9988, "step": 9 }, { "epoch": 0.03125, "grad_norm": 2.642423391342163, "learning_rate": 3.125e-06, "loss": 0.5582, "step": 10 }, { "epoch": 0.034375, "grad_norm": 3.6546943187713623, "learning_rate": 3.4375e-06, "loss": 0.8546, "step": 11 }, { "epoch": 0.0375, "grad_norm": 2.5504300594329834, "learning_rate": 3.7500000000000005e-06, "loss": 0.4235, "step": 12 }, { "epoch": 0.040625, "grad_norm": 2.845123529434204, "learning_rate": 4.0625000000000005e-06, "loss": 0.6418, "step": 13 }, { "epoch": 0.04375, "grad_norm": 2.8562164306640625, "learning_rate": 4.3750000000000005e-06, "loss": 0.6577, "step": 14 }, { "epoch": 0.046875, "grad_norm": 3.4033620357513428, "learning_rate": 4.6875000000000004e-06, "loss": 0.8333, "step": 15 }, { "epoch": 0.05, "grad_norm": 2.148242473602295, "learning_rate": 5e-06, "loss": 0.4082, "step": 16 }, { "epoch": 0.053125, "grad_norm": 3.685960292816162, "learning_rate": 5.3125e-06, "loss": 0.8101, "step": 17 }, { "epoch": 0.05625, "grad_norm": 2.7071452140808105, "learning_rate": 5.625e-06, "loss": 0.5259, "step": 18 }, { "epoch": 0.059375, "grad_norm": 3.508561611175537, "learning_rate": 5.9375e-06, "loss": 0.9015, "step": 19 }, { "epoch": 0.0625, "grad_norm": 4.140976428985596, "learning_rate": 6.25e-06, "loss": 1.3915, "step": 20 }, { "epoch": 0.065625, "grad_norm": 1.5563820600509644, "learning_rate": 6.5625e-06, "loss": 0.26, "step": 21 }, { "epoch": 0.06875, "grad_norm": 3.1467344760894775, "learning_rate": 6.875e-06, "loss": 0.6885, "step": 22 }, { "epoch": 0.071875, "grad_norm": 3.539327383041382, "learning_rate": 7.1875e-06, "loss": 0.9357, "step": 23 }, { "epoch": 0.075, "grad_norm": 3.1691510677337646, "learning_rate": 7.500000000000001e-06, "loss": 0.7168, "step": 24 }, { "epoch": 0.078125, "grad_norm": 3.9020121097564697, "learning_rate": 7.8125e-06, "loss": 0.8678, "step": 25 }, { "epoch": 0.08125, "grad_norm": 2.3635435104370117, "learning_rate": 8.125000000000001e-06, "loss": 0.4922, "step": 26 }, { "epoch": 0.084375, "grad_norm": 2.5170037746429443, "learning_rate": 8.4375e-06, "loss": 0.4937, "step": 27 }, { "epoch": 0.0875, "grad_norm": 2.7988407611846924, "learning_rate": 8.750000000000001e-06, "loss": 0.5891, "step": 28 }, { "epoch": 0.090625, "grad_norm": 2.99135160446167, "learning_rate": 9.0625e-06, "loss": 0.6921, "step": 29 }, { "epoch": 0.09375, "grad_norm": 3.098013162612915, "learning_rate": 9.375000000000001e-06, "loss": 0.8087, "step": 30 }, { "epoch": 0.096875, "grad_norm": 3.358091115951538, "learning_rate": 9.6875e-06, "loss": 0.805, "step": 31 }, { "epoch": 0.1, "grad_norm": 3.0206046104431152, "learning_rate": 1e-05, "loss": 0.6141, "step": 32 }, { "epoch": 0.1, "eval_VitaminC_cosine_accuracy": 0.5546875, "eval_VitaminC_cosine_accuracy_threshold": 0.8487042188644409, "eval_VitaminC_cosine_ap": 0.5467207830251657, "eval_VitaminC_cosine_f1": 0.6657824933687002, "eval_VitaminC_cosine_f1_threshold": 0.2510407269001007, "eval_VitaminC_cosine_precision": 0.4990059642147117, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 318.7947082519531, "eval_VitaminC_dot_ap": 0.5360598625078122, "eval_VitaminC_dot_f1": 0.6657824933687002, "eval_VitaminC_dot_f1_threshold": 98.82717895507812, "eval_VitaminC_dot_precision": 0.4990059642147117, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.552734375, "eval_VitaminC_euclidean_accuracy_threshold": 15.370981216430664, "eval_VitaminC_euclidean_ap": 0.54465834495355, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 24.364877700805664, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 273.6689758300781, "eval_VitaminC_manhattan_ap": 0.5450408710915566, "eval_VitaminC_manhattan_f1": 0.6675531914893617, "eval_VitaminC_manhattan_f1_threshold": 502.82244873046875, "eval_VitaminC_manhattan_precision": 0.500998003992016, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.5546875, "eval_VitaminC_max_accuracy_threshold": 318.7947082519531, "eval_VitaminC_max_ap": 0.5467207830251657, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 502.82244873046875, "eval_VitaminC_max_precision": 0.500998003992016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5467207830251657, "eval_sts-test_pearson_cosine": 0.8677868917853514, "eval_sts-test_pearson_dot": 0.8601917125112223, "eval_sts-test_pearson_euclidean": 0.889472619726378, "eval_sts-test_pearson_manhattan": 0.890143281884324, "eval_sts-test_pearson_max": 0.890143281884324, "eval_sts-test_spearman_cosine": 0.8954519734959775, "eval_sts-test_spearman_dot": 0.8621348855070287, "eval_sts-test_spearman_euclidean": 0.8880001748147683, "eval_sts-test_spearman_manhattan": 0.8870461226731652, "eval_sts-test_spearman_max": 0.8954519734959775, "eval_vitaminc-pairs_loss": 2.332582473754883, "eval_vitaminc-pairs_runtime": 2.2432, "eval_vitaminc-pairs_samples_per_second": 48.146, "eval_vitaminc-pairs_steps_per_second": 0.892, "step": 32 }, { "epoch": 0.1, "eval_negation-triplets_loss": 0.8681236505508423, "eval_negation-triplets_runtime": 0.2927, "eval_negation-triplets_samples_per_second": 218.641, "eval_negation-triplets_steps_per_second": 3.416, "step": 32 }, { "epoch": 0.1, "eval_scitail-pairs-pos_loss": 0.07759770005941391, "eval_scitail-pairs-pos_runtime": 0.3708, "eval_scitail-pairs-pos_samples_per_second": 145.613, "eval_scitail-pairs-pos_steps_per_second": 2.697, "step": 32 }, { "epoch": 0.1, "eval_xsum-pairs_loss": 0.09131219983100891, "eval_xsum-pairs_runtime": 2.8486, "eval_xsum-pairs_samples_per_second": 44.934, "eval_xsum-pairs_steps_per_second": 0.702, "step": 32 }, { "epoch": 0.1, "eval_sciq_pairs_loss": 0.01965576782822609, "eval_sciq_pairs_runtime": 3.6062, "eval_sciq_pairs_samples_per_second": 35.494, "eval_sciq_pairs_steps_per_second": 0.555, "step": 32 }, { "epoch": 0.1, "eval_qasc_pairs_loss": 0.10996829718351364, "eval_qasc_pairs_runtime": 0.5975, "eval_qasc_pairs_samples_per_second": 214.235, "eval_qasc_pairs_steps_per_second": 3.347, "step": 32 }, { "epoch": 0.1, "eval_openbookqa_pairs_loss": 0.6932356953620911, "eval_openbookqa_pairs_runtime": 0.5729, "eval_openbookqa_pairs_samples_per_second": 223.415, "eval_openbookqa_pairs_steps_per_second": 3.491, "step": 32 }, { "epoch": 0.1, "eval_msmarco_pairs_loss": 0.32686129212379456, "eval_msmarco_pairs_runtime": 1.4637, "eval_msmarco_pairs_samples_per_second": 87.448, "eval_msmarco_pairs_steps_per_second": 1.366, "step": 32 }, { "epoch": 0.1, "eval_nq_pairs_loss": 0.1978442668914795, "eval_nq_pairs_runtime": 2.8588, "eval_nq_pairs_samples_per_second": 44.774, "eval_nq_pairs_steps_per_second": 0.7, "step": 32 }, { "epoch": 0.1, "eval_trivia_pairs_loss": 0.7432661652565002, "eval_trivia_pairs_runtime": 4.3895, "eval_trivia_pairs_samples_per_second": 29.16, "eval_trivia_pairs_steps_per_second": 0.456, "step": 32 }, { "epoch": 0.1, "eval_gooaq_pairs_loss": 0.3761173486709595, "eval_gooaq_pairs_runtime": 1.0043, "eval_gooaq_pairs_samples_per_second": 127.452, "eval_gooaq_pairs_steps_per_second": 1.991, "step": 32 }, { "epoch": 0.1, "eval_paws-pos_loss": 0.02476382441818714, "eval_paws-pos_runtime": 0.6858, "eval_paws-pos_samples_per_second": 186.635, "eval_paws-pos_steps_per_second": 2.916, "step": 32 }, { "epoch": 0.103125, "grad_norm": 3.51029109954834, "learning_rate": 1.0312500000000002e-05, "loss": 0.7783, "step": 33 }, { "epoch": 0.10625, "grad_norm": 3.376455783843994, "learning_rate": 1.0625e-05, "loss": 0.8746, "step": 34 }, { "epoch": 0.109375, "grad_norm": 2.7385308742523193, "learning_rate": 1.0937500000000002e-05, "loss": 0.5085, "step": 35 }, { "epoch": 0.1125, "grad_norm": 2.782606840133667, "learning_rate": 1.125e-05, "loss": 0.4842, "step": 36 }, { "epoch": 0.115625, "grad_norm": 3.4377782344818115, "learning_rate": 1.1562500000000002e-05, "loss": 0.8097, "step": 37 }, { "epoch": 0.11875, "grad_norm": 2.6202378273010254, "learning_rate": 1.1875e-05, "loss": 0.5325, "step": 38 }, { "epoch": 0.121875, "grad_norm": 3.0869128704071045, "learning_rate": 1.2187500000000001e-05, "loss": 0.7221, "step": 39 }, { "epoch": 0.125, "grad_norm": 3.131516456604004, "learning_rate": 1.25e-05, "loss": 0.708, "step": 40 }, { "epoch": 0.128125, "grad_norm": 2.0318033695220947, "learning_rate": 1.2812500000000001e-05, "loss": 0.2789, "step": 41 }, { "epoch": 0.13125, "grad_norm": 3.2574217319488525, "learning_rate": 1.3125e-05, "loss": 0.7986, "step": 42 }, { "epoch": 0.134375, "grad_norm": 3.6287729740142822, "learning_rate": 1.3437500000000001e-05, "loss": 0.9653, "step": 43 }, { "epoch": 0.1375, "grad_norm": 3.1281752586364746, "learning_rate": 1.375e-05, "loss": 0.7857, "step": 44 }, { "epoch": 0.140625, "grad_norm": 2.201566219329834, "learning_rate": 1.4062500000000001e-05, "loss": 0.2726, "step": 45 }, { "epoch": 0.14375, "grad_norm": 1.8727688789367676, "learning_rate": 1.4375e-05, "loss": 0.2458, "step": 46 }, { "epoch": 0.146875, "grad_norm": 3.156454086303711, "learning_rate": 1.4687500000000001e-05, "loss": 0.6988, "step": 47 }, { "epoch": 0.15, "grad_norm": 3.0224971771240234, "learning_rate": 1.5000000000000002e-05, "loss": 0.6328, "step": 48 }, { "epoch": 0.153125, "grad_norm": 3.4717319011688232, "learning_rate": 1.5312500000000003e-05, "loss": 0.795, "step": 49 }, { "epoch": 0.15625, "grad_norm": 2.8961374759674072, "learning_rate": 1.5625e-05, "loss": 0.6163, "step": 50 }, { "epoch": 0.159375, "grad_norm": 3.667778491973877, "learning_rate": 1.59375e-05, "loss": 0.8269, "step": 51 }, { "epoch": 0.1625, "grad_norm": 2.350587844848633, "learning_rate": 1.6250000000000002e-05, "loss": 0.52, "step": 52 }, { "epoch": 0.165625, "grad_norm": 3.312248468399048, "learning_rate": 1.6562500000000003e-05, "loss": 0.7523, "step": 53 }, { "epoch": 0.16875, "grad_norm": 2.8101534843444824, "learning_rate": 1.6875e-05, "loss": 0.6979, "step": 54 }, { "epoch": 0.171875, "grad_norm": 3.144334077835083, "learning_rate": 1.71875e-05, "loss": 0.7845, "step": 55 }, { "epoch": 0.175, "grad_norm": 3.671412229537964, "learning_rate": 1.7500000000000002e-05, "loss": 0.9325, "step": 56 }, { "epoch": 0.178125, "grad_norm": 3.204644203186035, "learning_rate": 1.7812500000000003e-05, "loss": 0.8546, "step": 57 }, { "epoch": 0.18125, "grad_norm": 2.9951093196868896, "learning_rate": 1.8125e-05, "loss": 0.6392, "step": 58 }, { "epoch": 0.184375, "grad_norm": 3.036386013031006, "learning_rate": 1.84375e-05, "loss": 0.5827, "step": 59 }, { "epoch": 0.1875, "grad_norm": 3.0899698734283447, "learning_rate": 1.8750000000000002e-05, "loss": 0.5961, "step": 60 }, { "epoch": 0.190625, "grad_norm": 2.3574728965759277, "learning_rate": 1.9062500000000003e-05, "loss": 0.3625, "step": 61 }, { "epoch": 0.19375, "grad_norm": 2.4232304096221924, "learning_rate": 1.9375e-05, "loss": 0.2584, "step": 62 }, { "epoch": 0.196875, "grad_norm": 1.9016233682632446, "learning_rate": 1.96875e-05, "loss": 0.4047, "step": 63 }, { "epoch": 0.2, "grad_norm": 3.193114995956421, "learning_rate": 2e-05, "loss": 0.9429, "step": 64 }, { "epoch": 0.2, "eval_VitaminC_cosine_accuracy": 0.560546875, "eval_VitaminC_cosine_accuracy_threshold": 0.8192525506019592, "eval_VitaminC_cosine_ap": 0.5485465805560719, "eval_VitaminC_cosine_f1": 0.6675531914893617, "eval_VitaminC_cosine_f1_threshold": 0.30620089173316956, "eval_VitaminC_cosine_precision": 0.500998003992016, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 308.60137939453125, "eval_VitaminC_dot_ap": 0.5375184580780159, "eval_VitaminC_dot_f1": 0.6657824933687002, "eval_VitaminC_dot_f1_threshold": 97.275634765625, "eval_VitaminC_dot_precision": 0.4990059642147117, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.552734375, "eval_VitaminC_euclidean_accuracy_threshold": 11.976862907409668, "eval_VitaminC_euclidean_ap": 0.5494925067012235, "eval_VitaminC_euclidean_f1": 0.6666666666666666, "eval_VitaminC_euclidean_f1_threshold": 23.21343994140625, "eval_VitaminC_euclidean_precision": 0.5, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.552734375, "eval_VitaminC_manhattan_accuracy_threshold": 313.34185791015625, "eval_VitaminC_manhattan_ap": 0.5475158315491966, "eval_VitaminC_manhattan_f1": 0.6666666666666666, "eval_VitaminC_manhattan_f1_threshold": 495.06231689453125, "eval_VitaminC_manhattan_precision": 0.5, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.560546875, "eval_VitaminC_max_accuracy_threshold": 313.34185791015625, "eval_VitaminC_max_ap": 0.5494925067012235, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 495.06231689453125, "eval_VitaminC_max_precision": 0.500998003992016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5494925067012235, "eval_sts-test_pearson_cosine": 0.8681028367252808, "eval_sts-test_pearson_dot": 0.8578643818026934, "eval_sts-test_pearson_euclidean": 0.8913506886125709, "eval_sts-test_pearson_manhattan": 0.8922209656727235, "eval_sts-test_pearson_max": 0.8922209656727235, "eval_sts-test_spearman_cosine": 0.8960442588011338, "eval_sts-test_spearman_dot": 0.8606696844578128, "eval_sts-test_spearman_euclidean": 0.8895474944286376, "eval_sts-test_spearman_manhattan": 0.8895341585527426, "eval_sts-test_spearman_max": 0.8960442588011338, "eval_vitaminc-pairs_loss": 2.260099411010742, "eval_vitaminc-pairs_runtime": 1.8392, "eval_vitaminc-pairs_samples_per_second": 58.723, "eval_vitaminc-pairs_steps_per_second": 1.087, "step": 64 }, { "epoch": 0.2, "eval_negation-triplets_loss": 0.836820662021637, "eval_negation-triplets_runtime": 0.294, "eval_negation-triplets_samples_per_second": 217.7, "eval_negation-triplets_steps_per_second": 3.402, "step": 64 }, { "epoch": 0.2, "eval_scitail-pairs-pos_loss": 0.08362159878015518, "eval_scitail-pairs-pos_runtime": 0.3686, "eval_scitail-pairs-pos_samples_per_second": 146.509, "eval_scitail-pairs-pos_steps_per_second": 2.713, "step": 64 }, { "epoch": 0.2, "eval_xsum-pairs_loss": 0.08567425608634949, "eval_xsum-pairs_runtime": 2.8489, "eval_xsum-pairs_samples_per_second": 44.93, "eval_xsum-pairs_steps_per_second": 0.702, "step": 64 }, { "epoch": 0.2, "eval_sciq_pairs_loss": 0.019713517278432846, "eval_sciq_pairs_runtime": 3.616, "eval_sciq_pairs_samples_per_second": 35.399, "eval_sciq_pairs_steps_per_second": 0.553, "step": 64 }, { "epoch": 0.2, "eval_qasc_pairs_loss": 0.11403815448284149, "eval_qasc_pairs_runtime": 0.6024, "eval_qasc_pairs_samples_per_second": 212.48, "eval_qasc_pairs_steps_per_second": 3.32, "step": 64 }, { "epoch": 0.2, "eval_openbookqa_pairs_loss": 0.6793034076690674, "eval_openbookqa_pairs_runtime": 0.5864, "eval_openbookqa_pairs_samples_per_second": 218.266, "eval_openbookqa_pairs_steps_per_second": 3.41, "step": 64 }, { "epoch": 0.2, "eval_msmarco_pairs_loss": 0.34600257873535156, "eval_msmarco_pairs_runtime": 1.4668, "eval_msmarco_pairs_samples_per_second": 87.263, "eval_msmarco_pairs_steps_per_second": 1.363, "step": 64 }, { "epoch": 0.2, "eval_nq_pairs_loss": 0.22141708433628082, "eval_nq_pairs_runtime": 2.8596, "eval_nq_pairs_samples_per_second": 44.761, "eval_nq_pairs_steps_per_second": 0.699, "step": 64 }, { "epoch": 0.2, "eval_trivia_pairs_loss": 0.7303681969642639, "eval_trivia_pairs_runtime": 4.3864, "eval_trivia_pairs_samples_per_second": 29.181, "eval_trivia_pairs_steps_per_second": 0.456, "step": 64 }, { "epoch": 0.2, "eval_gooaq_pairs_loss": 0.38013964891433716, "eval_gooaq_pairs_runtime": 1.0052, "eval_gooaq_pairs_samples_per_second": 127.34, "eval_gooaq_pairs_steps_per_second": 1.99, "step": 64 }, { "epoch": 0.2, "eval_paws-pos_loss": 0.024541139602661133, "eval_paws-pos_runtime": 0.6851, "eval_paws-pos_samples_per_second": 186.844, "eval_paws-pos_steps_per_second": 2.919, "step": 64 }, { "epoch": 0.203125, "grad_norm": 3.5084540843963623, "learning_rate": 2.0312500000000002e-05, "loss": 0.7848, "step": 65 }, { "epoch": 0.20625, "grad_norm": 3.749316453933716, "learning_rate": 2.0625000000000003e-05, "loss": 0.7589, "step": 66 }, { "epoch": 0.209375, "grad_norm": 3.4131276607513428, "learning_rate": 2.09375e-05, "loss": 0.5905, "step": 67 }, { "epoch": 0.2125, "grad_norm": 2.4543726444244385, "learning_rate": 2.125e-05, "loss": 0.4211, "step": 68 }, { "epoch": 0.215625, "grad_norm": 2.6270904541015625, "learning_rate": 2.1562500000000002e-05, "loss": 0.5325, "step": 69 }, { "epoch": 0.21875, "grad_norm": 2.2518444061279297, "learning_rate": 2.1875000000000003e-05, "loss": 0.3541, "step": 70 }, { "epoch": 0.221875, "grad_norm": 3.88729190826416, "learning_rate": 2.21875e-05, "loss": 0.9396, "step": 71 }, { "epoch": 0.225, "grad_norm": 3.2759203910827637, "learning_rate": 2.25e-05, "loss": 0.6997, "step": 72 }, { "epoch": 0.228125, "grad_norm": 3.149787425994873, "learning_rate": 2.2812500000000002e-05, "loss": 0.6415, "step": 73 }, { "epoch": 0.23125, "grad_norm": 4.01395845413208, "learning_rate": 2.3125000000000003e-05, "loss": 1.1966, "step": 74 }, { "epoch": 0.234375, "grad_norm": 3.0432724952697754, "learning_rate": 2.34375e-05, "loss": 0.7142, "step": 75 }, { "epoch": 0.2375, "grad_norm": 2.960078716278076, "learning_rate": 2.375e-05, "loss": 0.6048, "step": 76 }, { "epoch": 0.240625, "grad_norm": 2.414846658706665, "learning_rate": 2.4062500000000002e-05, "loss": 0.4639, "step": 77 }, { "epoch": 0.24375, "grad_norm": 4.241907119750977, "learning_rate": 2.4375000000000003e-05, "loss": 0.9391, "step": 78 }, { "epoch": 0.246875, "grad_norm": 3.350724220275879, "learning_rate": 2.46875e-05, "loss": 0.6364, "step": 79 }, { "epoch": 0.25, "grad_norm": 2.519324541091919, "learning_rate": 2.5e-05, "loss": 0.515, "step": 80 }, { "epoch": 0.253125, "grad_norm": 3.655949592590332, "learning_rate": 2.5312500000000002e-05, "loss": 0.6505, "step": 81 }, { "epoch": 0.25625, "grad_norm": 3.1521031856536865, "learning_rate": 2.5625000000000003e-05, "loss": 0.6149, "step": 82 }, { "epoch": 0.259375, "grad_norm": 2.637176036834717, "learning_rate": 2.5937500000000004e-05, "loss": 0.4471, "step": 83 }, { "epoch": 0.2625, "grad_norm": 4.223080158233643, "learning_rate": 2.625e-05, "loss": 1.4199, "step": 84 }, { "epoch": 0.265625, "grad_norm": 3.141789436340332, "learning_rate": 2.6562500000000002e-05, "loss": 0.8484, "step": 85 }, { "epoch": 0.26875, "grad_norm": 3.2342255115509033, "learning_rate": 2.6875000000000003e-05, "loss": 0.6412, "step": 86 }, { "epoch": 0.271875, "grad_norm": 3.445375442504883, "learning_rate": 2.7187500000000004e-05, "loss": 0.65, "step": 87 }, { "epoch": 0.275, "grad_norm": 3.395848035812378, "learning_rate": 2.75e-05, "loss": 0.7453, "step": 88 }, { "epoch": 0.278125, "grad_norm": 3.752084493637085, "learning_rate": 2.7812500000000002e-05, "loss": 0.9506, "step": 89 }, { "epoch": 0.28125, "grad_norm": 3.2424893379211426, "learning_rate": 2.8125000000000003e-05, "loss": 0.6083, "step": 90 }, { "epoch": 0.284375, "grad_norm": 2.8851892948150635, "learning_rate": 2.8437500000000003e-05, "loss": 0.7102, "step": 91 }, { "epoch": 0.2875, "grad_norm": 2.385157823562622, "learning_rate": 2.875e-05, "loss": 0.4037, "step": 92 }, { "epoch": 0.290625, "grad_norm": 3.5539441108703613, "learning_rate": 2.90625e-05, "loss": 0.769, "step": 93 }, { "epoch": 0.29375, "grad_norm": 3.686418056488037, "learning_rate": 2.9375000000000003e-05, "loss": 0.8765, "step": 94 }, { "epoch": 0.296875, "grad_norm": 3.9195055961608887, "learning_rate": 2.9687500000000003e-05, "loss": 1.2583, "step": 95 }, { "epoch": 0.3, "grad_norm": 3.5373759269714355, "learning_rate": 3.0000000000000004e-05, "loss": 0.8885, "step": 96 }, { "epoch": 0.3, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.8407348990440369, "eval_VitaminC_cosine_ap": 0.5524635737287826, "eval_VitaminC_cosine_f1": 0.6666666666666666, "eval_VitaminC_cosine_f1_threshold": 0.2901695668697357, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 331.7409973144531, "eval_VitaminC_dot_ap": 0.5393192469559877, "eval_VitaminC_dot_f1": 0.6657824933687002, "eval_VitaminC_dot_f1_threshold": 104.93923950195312, "eval_VitaminC_dot_precision": 0.4990059642147117, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 10.979323387145996, "eval_VitaminC_euclidean_ap": 0.5510789245842218, "eval_VitaminC_euclidean_f1": 0.6666666666666666, "eval_VitaminC_euclidean_f1_threshold": 23.105466842651367, "eval_VitaminC_euclidean_precision": 0.5, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.55078125, "eval_VitaminC_manhattan_accuracy_threshold": 228.8612060546875, "eval_VitaminC_manhattan_ap": 0.550140326019901, "eval_VitaminC_manhattan_f1": 0.6666666666666667, "eval_VitaminC_manhattan_f1_threshold": 479.256103515625, "eval_VitaminC_manhattan_precision": 0.501002004008016, "eval_VitaminC_manhattan_recall": 0.9960159362549801, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 331.7409973144531, "eval_VitaminC_max_ap": 0.5524635737287826, "eval_VitaminC_max_f1": 0.6666666666666667, "eval_VitaminC_max_f1_threshold": 479.256103515625, "eval_VitaminC_max_precision": 0.501002004008016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5524635737287826, "eval_sts-test_pearson_cosine": 0.8707252459918289, "eval_sts-test_pearson_dot": 0.8616721319399807, "eval_sts-test_pearson_euclidean": 0.8926205493906139, "eval_sts-test_pearson_manhattan": 0.8931067612799872, "eval_sts-test_pearson_max": 0.8931067612799872, "eval_sts-test_spearman_cosine": 0.8969095691913977, "eval_sts-test_spearman_dot": 0.8614390033923923, "eval_sts-test_spearman_euclidean": 0.8906887410966409, "eval_sts-test_spearman_manhattan": 0.8902939007173846, "eval_sts-test_spearman_max": 0.8969095691913977, "eval_vitaminc-pairs_loss": 2.259434938430786, "eval_vitaminc-pairs_runtime": 1.8587, "eval_vitaminc-pairs_samples_per_second": 58.104, "eval_vitaminc-pairs_steps_per_second": 1.076, "step": 96 }, { "epoch": 0.3, "eval_negation-triplets_loss": 0.8346852660179138, "eval_negation-triplets_runtime": 0.2932, "eval_negation-triplets_samples_per_second": 218.315, "eval_negation-triplets_steps_per_second": 3.411, "step": 96 }, { "epoch": 0.3, "eval_scitail-pairs-pos_loss": 0.07568605989217758, "eval_scitail-pairs-pos_runtime": 0.3763, "eval_scitail-pairs-pos_samples_per_second": 143.494, "eval_scitail-pairs-pos_steps_per_second": 2.657, "step": 96 }, { "epoch": 0.3, "eval_xsum-pairs_loss": 0.08208194375038147, "eval_xsum-pairs_runtime": 2.8486, "eval_xsum-pairs_samples_per_second": 44.934, "eval_xsum-pairs_steps_per_second": 0.702, "step": 96 }, { "epoch": 0.3, "eval_sciq_pairs_loss": 0.020024314522743225, "eval_sciq_pairs_runtime": 3.6173, "eval_sciq_pairs_samples_per_second": 35.386, "eval_sciq_pairs_steps_per_second": 0.553, "step": 96 }, { "epoch": 0.3, "eval_qasc_pairs_loss": 0.10592304170131683, "eval_qasc_pairs_runtime": 0.5997, "eval_qasc_pairs_samples_per_second": 213.431, "eval_qasc_pairs_steps_per_second": 3.335, "step": 96 }, { "epoch": 0.3, "eval_openbookqa_pairs_loss": 0.6809090971946716, "eval_openbookqa_pairs_runtime": 0.5752, "eval_openbookqa_pairs_samples_per_second": 222.54, "eval_openbookqa_pairs_steps_per_second": 3.477, "step": 96 }, { "epoch": 0.3, "eval_msmarco_pairs_loss": 0.3400232195854187, "eval_msmarco_pairs_runtime": 1.4679, "eval_msmarco_pairs_samples_per_second": 87.202, "eval_msmarco_pairs_steps_per_second": 1.363, "step": 96 }, { "epoch": 0.3, "eval_nq_pairs_loss": 0.2074178159236908, "eval_nq_pairs_runtime": 2.8593, "eval_nq_pairs_samples_per_second": 44.766, "eval_nq_pairs_steps_per_second": 0.699, "step": 96 }, { "epoch": 0.3, "eval_trivia_pairs_loss": 0.7431399822235107, "eval_trivia_pairs_runtime": 4.4162, "eval_trivia_pairs_samples_per_second": 28.984, "eval_trivia_pairs_steps_per_second": 0.453, "step": 96 }, { "epoch": 0.3, "eval_gooaq_pairs_loss": 0.3708875775337219, "eval_gooaq_pairs_runtime": 1.0094, "eval_gooaq_pairs_samples_per_second": 126.81, "eval_gooaq_pairs_steps_per_second": 1.981, "step": 96 }, { "epoch": 0.3, "eval_paws-pos_loss": 0.024763749912381172, "eval_paws-pos_runtime": 0.6874, "eval_paws-pos_samples_per_second": 186.212, "eval_paws-pos_steps_per_second": 2.91, "step": 96 }, { "epoch": 0.303125, "grad_norm": 3.2354822158813477, "learning_rate": 3.03125e-05, "loss": 0.6398, "step": 97 }, { "epoch": 0.30625, "grad_norm": 3.6665022373199463, "learning_rate": 3.0625000000000006e-05, "loss": 0.8263, "step": 98 }, { "epoch": 0.309375, "grad_norm": 3.026954412460327, "learning_rate": 3.09375e-05, "loss": 0.8716, "step": 99 }, { "epoch": 0.3125, "grad_norm": 2.445453643798828, "learning_rate": 3.125e-05, "loss": 0.5523, "step": 100 }, { "epoch": 0.315625, "grad_norm": 3.4408035278320312, "learning_rate": 3.15625e-05, "loss": 0.5811, "step": 101 }, { "epoch": 0.31875, "grad_norm": 2.8406240940093994, "learning_rate": 3.1875e-05, "loss": 0.7602, "step": 102 }, { "epoch": 0.321875, "grad_norm": 2.5201492309570312, "learning_rate": 3.21875e-05, "loss": 0.5337, "step": 103 }, { "epoch": 0.325, "grad_norm": 3.323239326477051, "learning_rate": 3.2500000000000004e-05, "loss": 0.8182, "step": 104 }, { "epoch": 0.328125, "grad_norm": 3.2463977336883545, "learning_rate": 3.2812500000000005e-05, "loss": 0.6641, "step": 105 }, { "epoch": 0.33125, "grad_norm": 3.4495010375976562, "learning_rate": 3.3125000000000006e-05, "loss": 1.0088, "step": 106 }, { "epoch": 0.334375, "grad_norm": 2.7572243213653564, "learning_rate": 3.34375e-05, "loss": 0.7556, "step": 107 }, { "epoch": 0.3375, "grad_norm": 3.494549512863159, "learning_rate": 3.375e-05, "loss": 0.713, "step": 108 }, { "epoch": 0.340625, "grad_norm": 3.4666013717651367, "learning_rate": 3.40625e-05, "loss": 0.8385, "step": 109 }, { "epoch": 0.34375, "grad_norm": 3.05104660987854, "learning_rate": 3.4375e-05, "loss": 0.5181, "step": 110 }, { "epoch": 0.346875, "grad_norm": 3.8259003162384033, "learning_rate": 3.46875e-05, "loss": 1.0939, "step": 111 }, { "epoch": 0.35, "grad_norm": 3.287792205810547, "learning_rate": 3.5000000000000004e-05, "loss": 0.5826, "step": 112 }, { "epoch": 0.353125, "grad_norm": 3.9174458980560303, "learning_rate": 3.5312500000000005e-05, "loss": 0.7121, "step": 113 }, { "epoch": 0.35625, "grad_norm": 3.424893379211426, "learning_rate": 3.5625000000000005e-05, "loss": 0.9371, "step": 114 }, { "epoch": 0.359375, "grad_norm": 3.5157482624053955, "learning_rate": 3.5937500000000006e-05, "loss": 0.7739, "step": 115 }, { "epoch": 0.3625, "grad_norm": 4.468640327453613, "learning_rate": 3.625e-05, "loss": 0.9612, "step": 116 }, { "epoch": 0.365625, "grad_norm": 3.4379608631134033, "learning_rate": 3.65625e-05, "loss": 0.7213, "step": 117 }, { "epoch": 0.36875, "grad_norm": 2.9453623294830322, "learning_rate": 3.6875e-05, "loss": 0.621, "step": 118 }, { "epoch": 0.371875, "grad_norm": 2.4365315437316895, "learning_rate": 3.71875e-05, "loss": 0.5503, "step": 119 }, { "epoch": 0.375, "grad_norm": 3.446967124938965, "learning_rate": 3.7500000000000003e-05, "loss": 0.8439, "step": 120 }, { "epoch": 0.378125, "grad_norm": 3.8797788619995117, "learning_rate": 3.7812500000000004e-05, "loss": 0.7813, "step": 121 }, { "epoch": 0.38125, "grad_norm": 3.0103230476379395, "learning_rate": 3.8125000000000005e-05, "loss": 0.5637, "step": 122 }, { "epoch": 0.384375, "grad_norm": 3.9547793865203857, "learning_rate": 3.8437500000000006e-05, "loss": 0.9052, "step": 123 }, { "epoch": 0.3875, "grad_norm": 2.953261375427246, "learning_rate": 3.875e-05, "loss": 0.64, "step": 124 }, { "epoch": 0.390625, "grad_norm": 2.914365768432617, "learning_rate": 3.90625e-05, "loss": 0.6529, "step": 125 }, { "epoch": 0.39375, "grad_norm": 3.346844434738159, "learning_rate": 3.9375e-05, "loss": 0.6894, "step": 126 }, { "epoch": 0.396875, "grad_norm": 3.946427583694458, "learning_rate": 3.96875e-05, "loss": 0.8604, "step": 127 }, { "epoch": 0.4, "grad_norm": 3.3265583515167236, "learning_rate": 4e-05, "loss": 0.8503, "step": 128 }, { "epoch": 0.4, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.835027813911438, "eval_VitaminC_cosine_ap": 0.5482054260732142, "eval_VitaminC_cosine_f1": 0.6666666666666666, "eval_VitaminC_cosine_f1_threshold": 0.28428012132644653, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.548828125, "eval_VitaminC_dot_accuracy_threshold": 321.1236572265625, "eval_VitaminC_dot_ap": 0.5350248143918641, "eval_VitaminC_dot_f1": 0.6649006622516557, "eval_VitaminC_dot_f1_threshold": 94.1016616821289, "eval_VitaminC_dot_precision": 0.498015873015873, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 13.260427474975586, "eval_VitaminC_euclidean_ap": 0.551773706587656, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 23.911056518554688, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.55859375, "eval_VitaminC_manhattan_accuracy_threshold": 273.4624328613281, "eval_VitaminC_manhattan_ap": 0.5494410762635437, "eval_VitaminC_manhattan_f1": 0.6666666666666667, "eval_VitaminC_manhattan_f1_threshold": 472.7373046875, "eval_VitaminC_manhattan_precision": 0.5020161290322581, "eval_VitaminC_manhattan_recall": 0.9920318725099602, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 321.1236572265625, "eval_VitaminC_max_ap": 0.551773706587656, "eval_VitaminC_max_f1": 0.6666666666666667, "eval_VitaminC_max_f1_threshold": 472.7373046875, "eval_VitaminC_max_precision": 0.5020161290322581, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.551773706587656, "eval_sts-test_pearson_cosine": 0.8672675483925697, "eval_sts-test_pearson_dot": 0.8586110849200466, "eval_sts-test_pearson_euclidean": 0.8915515585715386, "eval_sts-test_pearson_manhattan": 0.8913674606593633, "eval_sts-test_pearson_max": 0.8915515585715386, "eval_sts-test_spearman_cosine": 0.8969123885208655, "eval_sts-test_spearman_dot": 0.8619306407500383, "eval_sts-test_spearman_euclidean": 0.8903670690297594, "eval_sts-test_spearman_manhattan": 0.890351227083227, "eval_sts-test_spearman_max": 0.8969123885208655, "eval_vitaminc-pairs_loss": 2.0338199138641357, "eval_vitaminc-pairs_runtime": 1.8309, "eval_vitaminc-pairs_samples_per_second": 58.988, "eval_vitaminc-pairs_steps_per_second": 1.092, "step": 128 }, { "epoch": 0.4, "eval_negation-triplets_loss": 0.7916581630706787, "eval_negation-triplets_runtime": 0.2912, "eval_negation-triplets_samples_per_second": 219.766, "eval_negation-triplets_steps_per_second": 3.434, "step": 128 }, { "epoch": 0.4, "eval_scitail-pairs-pos_loss": 0.07755717635154724, "eval_scitail-pairs-pos_runtime": 0.3716, "eval_scitail-pairs-pos_samples_per_second": 145.312, "eval_scitail-pairs-pos_steps_per_second": 2.691, "step": 128 }, { "epoch": 0.4, "eval_xsum-pairs_loss": 0.08196285367012024, "eval_xsum-pairs_runtime": 2.852, "eval_xsum-pairs_samples_per_second": 44.881, "eval_xsum-pairs_steps_per_second": 0.701, "step": 128 }, { "epoch": 0.4, "eval_sciq_pairs_loss": 0.020960956811904907, "eval_sciq_pairs_runtime": 3.5913, "eval_sciq_pairs_samples_per_second": 35.642, "eval_sciq_pairs_steps_per_second": 0.557, "step": 128 }, { "epoch": 0.4, "eval_qasc_pairs_loss": 0.11308694630861282, "eval_qasc_pairs_runtime": 0.595, "eval_qasc_pairs_samples_per_second": 215.137, "eval_qasc_pairs_steps_per_second": 3.362, "step": 128 }, { "epoch": 0.4, "eval_openbookqa_pairs_loss": 0.7888042330741882, "eval_openbookqa_pairs_runtime": 0.5711, "eval_openbookqa_pairs_samples_per_second": 224.114, "eval_openbookqa_pairs_steps_per_second": 3.502, "step": 128 }, { "epoch": 0.4, "eval_msmarco_pairs_loss": 0.3428971469402313, "eval_msmarco_pairs_runtime": 1.465, "eval_msmarco_pairs_samples_per_second": 87.373, "eval_msmarco_pairs_steps_per_second": 1.365, "step": 128 }, { "epoch": 0.4, "eval_nq_pairs_loss": 0.20846250653266907, "eval_nq_pairs_runtime": 2.8581, "eval_nq_pairs_samples_per_second": 44.786, "eval_nq_pairs_steps_per_second": 0.7, "step": 128 }, { "epoch": 0.4, "eval_trivia_pairs_loss": 0.7110738754272461, "eval_trivia_pairs_runtime": 4.3917, "eval_trivia_pairs_samples_per_second": 29.146, "eval_trivia_pairs_steps_per_second": 0.455, "step": 128 }, { "epoch": 0.4, "eval_gooaq_pairs_loss": 0.3744402229785919, "eval_gooaq_pairs_runtime": 1.0043, "eval_gooaq_pairs_samples_per_second": 127.448, "eval_gooaq_pairs_steps_per_second": 1.991, "step": 128 }, { "epoch": 0.4, "eval_paws-pos_loss": 0.024828137829899788, "eval_paws-pos_runtime": 0.6859, "eval_paws-pos_samples_per_second": 186.611, "eval_paws-pos_steps_per_second": 2.916, "step": 128 }, { "epoch": 0.403125, "grad_norm": 3.7963619232177734, "learning_rate": 3.999971762923902e-05, "loss": 0.8171, "step": 129 }, { "epoch": 0.40625, "grad_norm": 3.987645387649536, "learning_rate": 3.999887052758717e-05, "loss": 1.0401, "step": 130 }, { "epoch": 0.409375, "grad_norm": 2.653578758239746, "learning_rate": 3.999745872693735e-05, "loss": 0.4243, "step": 131 }, { "epoch": 0.4125, "grad_norm": 2.3737175464630127, "learning_rate": 3.9995482280443065e-05, "loss": 0.3778, "step": 132 }, { "epoch": 0.415625, "grad_norm": 3.334118127822876, "learning_rate": 3.99929412625164e-05, "loss": 0.7651, "step": 133 }, { "epoch": 0.41875, "grad_norm": 3.5098752975463867, "learning_rate": 3.998983576882524e-05, "loss": 0.6003, "step": 134 }, { "epoch": 0.421875, "grad_norm": 3.023698091506958, "learning_rate": 3.9986165916289686e-05, "loss": 0.6023, "step": 135 }, { "epoch": 0.425, "grad_norm": 3.293668746948242, "learning_rate": 3.998193184307759e-05, "loss": 0.6079, "step": 136 }, { "epoch": 0.428125, "grad_norm": 3.326125144958496, "learning_rate": 3.997713370859942e-05, "loss": 0.6206, "step": 137 }, { "epoch": 0.43125, "grad_norm": 3.322040557861328, "learning_rate": 3.997177169350224e-05, "loss": 0.4694, "step": 138 }, { "epoch": 0.434375, "grad_norm": 3.1219382286071777, "learning_rate": 3.996584599966288e-05, "loss": 0.7528, "step": 139 }, { "epoch": 0.4375, "grad_norm": 3.7076480388641357, "learning_rate": 3.9959356850180354e-05, "loss": 0.8395, "step": 140 }, { "epoch": 0.440625, "grad_norm": 3.1098551750183105, "learning_rate": 3.995230448936749e-05, "loss": 0.6689, "step": 141 }, { "epoch": 0.44375, "grad_norm": 3.31339168548584, "learning_rate": 3.9944689182741674e-05, "loss": 0.6547, "step": 142 }, { "epoch": 0.446875, "grad_norm": 4.2841386795043945, "learning_rate": 3.99365112170149e-05, "loss": 0.9242, "step": 143 }, { "epoch": 0.45, "grad_norm": 4.0628132820129395, "learning_rate": 3.992777090008296e-05, "loss": 0.9496, "step": 144 }, { "epoch": 0.453125, "grad_norm": 3.484614849090576, "learning_rate": 3.9918468561013834e-05, "loss": 0.6506, "step": 145 }, { "epoch": 0.45625, "grad_norm": 3.4139559268951416, "learning_rate": 3.990860455003534e-05, "loss": 0.786, "step": 146 }, { "epoch": 0.459375, "grad_norm": 3.4322853088378906, "learning_rate": 3.9898179238521916e-05, "loss": 0.7414, "step": 147 }, { "epoch": 0.4625, "grad_norm": 2.660554885864258, "learning_rate": 3.9887193018980654e-05, "loss": 0.3978, "step": 148 }, { "epoch": 0.465625, "grad_norm": 2.6429054737091064, "learning_rate": 3.9875646305036494e-05, "loss": 0.5635, "step": 149 }, { "epoch": 0.46875, "grad_norm": 4.292131423950195, "learning_rate": 3.98635395314167e-05, "loss": 0.9466, "step": 150 }, { "epoch": 0.471875, "grad_norm": 3.1115028858184814, "learning_rate": 3.9850873153934456e-05, "loss": 0.5251, "step": 151 }, { "epoch": 0.475, "grad_norm": 3.307051181793213, "learning_rate": 3.983764764947172e-05, "loss": 0.6636, "step": 152 }, { "epoch": 0.478125, "grad_norm": 3.807854652404785, "learning_rate": 3.9823863515961245e-05, "loss": 0.7834, "step": 153 }, { "epoch": 0.48125, "grad_norm": 2.9957728385925293, "learning_rate": 3.980952127236788e-05, "loss": 0.6177, "step": 154 }, { "epoch": 0.484375, "grad_norm": 3.3072471618652344, "learning_rate": 3.979462145866898e-05, "loss": 0.4558, "step": 155 }, { "epoch": 0.4875, "grad_norm": 3.0199949741363525, "learning_rate": 3.977916463583412e-05, "loss": 0.5228, "step": 156 }, { "epoch": 0.490625, "grad_norm": 2.8596651554107666, "learning_rate": 3.9763151385803936e-05, "loss": 0.5543, "step": 157 }, { "epoch": 0.49375, "grad_norm": 3.0589263439178467, "learning_rate": 3.974658231146825e-05, "loss": 0.7127, "step": 158 }, { "epoch": 0.496875, "grad_norm": 2.489602565765381, "learning_rate": 3.9729458036643335e-05, "loss": 0.4227, "step": 159 }, { "epoch": 0.5, "grad_norm": 3.3471999168395996, "learning_rate": 3.971177920604846e-05, "loss": 0.5914, "step": 160 }, { "epoch": 0.5, "eval_VitaminC_cosine_accuracy": 0.55859375, "eval_VitaminC_cosine_accuracy_threshold": 0.8433390855789185, "eval_VitaminC_cosine_ap": 0.5529005025024077, "eval_VitaminC_cosine_f1": 0.6657824933687002, "eval_VitaminC_cosine_f1_threshold": 0.3040446639060974, "eval_VitaminC_cosine_precision": 0.4990059642147117, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55859375, "eval_VitaminC_dot_accuracy_threshold": 309.7912902832031, "eval_VitaminC_dot_ap": 0.5373200658982779, "eval_VitaminC_dot_f1": 0.6666666666666666, "eval_VitaminC_dot_f1_threshold": 122.78400421142578, "eval_VitaminC_dot_precision": 0.5, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 11.011507034301758, "eval_VitaminC_euclidean_ap": 0.5542686405562732, "eval_VitaminC_euclidean_f1": 0.6675531914893617, "eval_VitaminC_euclidean_f1_threshold": 22.90133285522461, "eval_VitaminC_euclidean_precision": 0.500998003992016, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.55859375, "eval_VitaminC_manhattan_accuracy_threshold": 293.54693603515625, "eval_VitaminC_manhattan_ap": 0.5529507613553954, "eval_VitaminC_manhattan_f1": 0.6666666666666667, "eval_VitaminC_manhattan_f1_threshold": 479.09588623046875, "eval_VitaminC_manhattan_precision": 0.501002004008016, "eval_VitaminC_manhattan_recall": 0.9960159362549801, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 309.7912902832031, "eval_VitaminC_max_ap": 0.5542686405562732, "eval_VitaminC_max_f1": 0.6675531914893617, "eval_VitaminC_max_f1_threshold": 479.09588623046875, "eval_VitaminC_max_precision": 0.501002004008016, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5542686405562732, "eval_sts-test_pearson_cosine": 0.8717931331186477, "eval_sts-test_pearson_dot": 0.8628985772297639, "eval_sts-test_pearson_euclidean": 0.8935960577585327, "eval_sts-test_pearson_manhattan": 0.8926162242871916, "eval_sts-test_pearson_max": 0.8935960577585327, "eval_sts-test_spearman_cosine": 0.8989036406477372, "eval_sts-test_spearman_dot": 0.8620115510306339, "eval_sts-test_spearman_euclidean": 0.8911198747488857, "eval_sts-test_spearman_manhattan": 0.8899440801070879, "eval_sts-test_spearman_max": 0.8989036406477372, "eval_vitaminc-pairs_loss": 2.0564281940460205, "eval_vitaminc-pairs_runtime": 1.8511, "eval_vitaminc-pairs_samples_per_second": 58.343, "eval_vitaminc-pairs_steps_per_second": 1.08, "step": 160 }, { "epoch": 0.5, "eval_negation-triplets_loss": 0.7865684032440186, "eval_negation-triplets_runtime": 0.2987, "eval_negation-triplets_samples_per_second": 214.291, "eval_negation-triplets_steps_per_second": 3.348, "step": 160 }, { "epoch": 0.5, "eval_scitail-pairs-pos_loss": 0.09969007223844528, "eval_scitail-pairs-pos_runtime": 0.384, "eval_scitail-pairs-pos_samples_per_second": 140.615, "eval_scitail-pairs-pos_steps_per_second": 2.604, "step": 160 }, { "epoch": 0.5, "eval_xsum-pairs_loss": 0.08461853861808777, "eval_xsum-pairs_runtime": 2.8533, "eval_xsum-pairs_samples_per_second": 44.86, "eval_xsum-pairs_steps_per_second": 0.701, "step": 160 }, { "epoch": 0.5, "eval_sciq_pairs_loss": 0.020078735426068306, "eval_sciq_pairs_runtime": 3.6458, "eval_sciq_pairs_samples_per_second": 35.109, "eval_sciq_pairs_steps_per_second": 0.549, "step": 160 }, { "epoch": 0.5, "eval_qasc_pairs_loss": 0.12362705171108246, "eval_qasc_pairs_runtime": 0.6028, "eval_qasc_pairs_samples_per_second": 212.356, "eval_qasc_pairs_steps_per_second": 3.318, "step": 160 }, { "epoch": 0.5, "eval_openbookqa_pairs_loss": 0.6668081283569336, "eval_openbookqa_pairs_runtime": 0.5784, "eval_openbookqa_pairs_samples_per_second": 221.308, "eval_openbookqa_pairs_steps_per_second": 3.458, "step": 160 }, { "epoch": 0.5, "eval_msmarco_pairs_loss": 0.32913729548454285, "eval_msmarco_pairs_runtime": 1.4669, "eval_msmarco_pairs_samples_per_second": 87.26, "eval_msmarco_pairs_steps_per_second": 1.363, "step": 160 }, { "epoch": 0.5, "eval_nq_pairs_loss": 0.2085198312997818, "eval_nq_pairs_runtime": 2.8644, "eval_nq_pairs_samples_per_second": 44.687, "eval_nq_pairs_steps_per_second": 0.698, "step": 160 }, { "epoch": 0.5, "eval_trivia_pairs_loss": 0.7138605117797852, "eval_trivia_pairs_runtime": 4.3915, "eval_trivia_pairs_samples_per_second": 29.147, "eval_trivia_pairs_steps_per_second": 0.455, "step": 160 }, { "epoch": 0.5, "eval_gooaq_pairs_loss": 0.3919322192668915, "eval_gooaq_pairs_runtime": 1.004, "eval_gooaq_pairs_samples_per_second": 127.484, "eval_gooaq_pairs_steps_per_second": 1.992, "step": 160 }, { "epoch": 0.5, "eval_paws-pos_loss": 0.025703923776745796, "eval_paws-pos_runtime": 0.6869, "eval_paws-pos_samples_per_second": 186.332, "eval_paws-pos_steps_per_second": 2.911, "step": 160 }, { "epoch": 0.503125, "grad_norm": 2.7484354972839355, "learning_rate": 3.9693546485281616e-05, "loss": 0.3874, "step": 161 }, { "epoch": 0.50625, "grad_norm": 3.9011173248291016, "learning_rate": 3.967476056079441e-05, "loss": 0.8134, "step": 162 }, { "epoch": 0.509375, "grad_norm": 3.723893642425537, "learning_rate": 3.9655422139866315e-05, "loss": 0.5596, "step": 163 }, { "epoch": 0.5125, "grad_norm": 1.8328720331192017, "learning_rate": 3.963553195057793e-05, "loss": 0.2877, "step": 164 }, { "epoch": 0.515625, "grad_norm": 2.9615490436553955, "learning_rate": 3.9615090741783634e-05, "loss": 0.5218, "step": 165 }, { "epoch": 0.51875, "grad_norm": 3.041154146194458, "learning_rate": 3.959409928308341e-05, "loss": 0.5282, "step": 166 }, { "epoch": 0.521875, "grad_norm": 3.439157247543335, "learning_rate": 3.957255836479377e-05, "loss": 0.7528, "step": 167 }, { "epoch": 0.525, "grad_norm": 3.576984405517578, "learning_rate": 3.955046879791816e-05, "loss": 0.7174, "step": 168 }, { "epoch": 0.528125, "grad_norm": 3.1042630672454834, "learning_rate": 3.952783141411626e-05, "loss": 0.6902, "step": 169 }, { "epoch": 0.53125, "grad_norm": 3.0211422443389893, "learning_rate": 3.9504647065672785e-05, "loss": 0.7486, "step": 170 }, { "epoch": 0.534375, "grad_norm": 3.5162508487701416, "learning_rate": 3.9480916625465344e-05, "loss": 0.6333, "step": 171 }, { "epoch": 0.5375, "grad_norm": 3.9070920944213867, "learning_rate": 3.9456640986931606e-05, "loss": 1.2932, "step": 172 }, { "epoch": 0.540625, "grad_norm": 3.548743724822998, "learning_rate": 3.943182106403563e-05, "loss": 0.6259, "step": 173 }, { "epoch": 0.54375, "grad_norm": 3.64949893951416, "learning_rate": 3.940645779123349e-05, "loss": 0.8357, "step": 174 }, { "epoch": 0.546875, "grad_norm": 2.4284133911132812, "learning_rate": 3.938055212343807e-05, "loss": 0.3604, "step": 175 }, { "epoch": 0.55, "grad_norm": 2.9141008853912354, "learning_rate": 3.9354105035983135e-05, "loss": 0.6598, "step": 176 }, { "epoch": 0.553125, "grad_norm": 2.0430235862731934, "learning_rate": 3.932711752458657e-05, "loss": 0.3169, "step": 177 }, { "epoch": 0.55625, "grad_norm": 3.522728204727173, "learning_rate": 3.929959060531291e-05, "loss": 0.8629, "step": 178 }, { "epoch": 0.559375, "grad_norm": 2.419400453567505, "learning_rate": 3.927152531453513e-05, "loss": 0.3648, "step": 179 }, { "epoch": 0.5625, "grad_norm": 2.826747417449951, "learning_rate": 3.924292270889555e-05, "loss": 0.5103, "step": 180 }, { "epoch": 0.565625, "grad_norm": 3.2149524688720703, "learning_rate": 3.921378386526612e-05, "loss": 0.6255, "step": 181 }, { "epoch": 0.56875, "grad_norm": 2.2112457752227783, "learning_rate": 3.918410988070782e-05, "loss": 0.4382, "step": 182 }, { "epoch": 0.571875, "grad_norm": 2.301940441131592, "learning_rate": 3.915390187242941e-05, "loss": 0.4647, "step": 183 }, { "epoch": 0.575, "grad_norm": 2.272001266479492, "learning_rate": 3.912316097774532e-05, "loss": 0.4218, "step": 184 }, { "epoch": 0.578125, "grad_norm": 3.77436900138855, "learning_rate": 3.909188835403285e-05, "loss": 0.8244, "step": 185 }, { "epoch": 0.58125, "grad_norm": 3.236813545227051, "learning_rate": 3.906008517868863e-05, "loss": 0.6579, "step": 186 }, { "epoch": 0.584375, "grad_norm": 3.1845405101776123, "learning_rate": 3.9027752649084215e-05, "loss": 0.8384, "step": 187 }, { "epoch": 0.5875, "grad_norm": 2.709747791290283, "learning_rate": 3.899489198252108e-05, "loss": 0.5266, "step": 188 }, { "epoch": 0.590625, "grad_norm": 2.5210235118865967, "learning_rate": 3.896150441618476e-05, "loss": 0.5079, "step": 189 }, { "epoch": 0.59375, "grad_norm": 1.9979658126831055, "learning_rate": 3.892759120709824e-05, "loss": 0.2574, "step": 190 }, { "epoch": 0.596875, "grad_norm": 2.4257137775421143, "learning_rate": 3.8893153632074675e-05, "loss": 0.4162, "step": 191 }, { "epoch": 0.6, "grad_norm": 3.482635021209717, "learning_rate": 3.88581929876693e-05, "loss": 0.7872, "step": 192 }, { "epoch": 0.6, "eval_VitaminC_cosine_accuracy": 0.564453125, "eval_VitaminC_cosine_accuracy_threshold": 0.737064003944397, "eval_VitaminC_cosine_ap": 0.5553950127875514, "eval_VitaminC_cosine_f1": 0.6666666666666666, "eval_VitaminC_cosine_f1_threshold": 0.312030553817749, "eval_VitaminC_cosine_precision": 0.5, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5625, "eval_VitaminC_dot_accuracy_threshold": 326.57232666015625, "eval_VitaminC_dot_ap": 0.5370581483003721, "eval_VitaminC_dot_f1": 0.6649006622516557, "eval_VitaminC_dot_f1_threshold": 116.00311279296875, "eval_VitaminC_dot_precision": 0.498015873015873, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.55859375, "eval_VitaminC_euclidean_accuracy_threshold": 13.492112159729004, "eval_VitaminC_euclidean_ap": 0.5536857778177137, "eval_VitaminC_euclidean_f1": 0.6657824933687002, "eval_VitaminC_euclidean_f1_threshold": 23.840118408203125, "eval_VitaminC_euclidean_precision": 0.4990059642147117, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5625, "eval_VitaminC_manhattan_accuracy_threshold": 306.6820983886719, "eval_VitaminC_manhattan_ap": 0.5520101545849081, "eval_VitaminC_manhattan_f1": 0.6666666666666666, "eval_VitaminC_manhattan_f1_threshold": 490.146728515625, "eval_VitaminC_manhattan_precision": 0.5, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.564453125, "eval_VitaminC_max_accuracy_threshold": 326.57232666015625, "eval_VitaminC_max_ap": 0.5553950127875514, "eval_VitaminC_max_f1": 0.6666666666666666, "eval_VitaminC_max_f1_threshold": 490.146728515625, "eval_VitaminC_max_precision": 0.5, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5553950127875514, "eval_sts-test_pearson_cosine": 0.8705183135475563, "eval_sts-test_pearson_dot": 0.8575572680200927, "eval_sts-test_pearson_euclidean": 0.894961141451468, "eval_sts-test_pearson_manhattan": 0.8946364485546632, "eval_sts-test_pearson_max": 0.894961141451468, "eval_sts-test_spearman_cosine": 0.8981581293842179, "eval_sts-test_spearman_dot": 0.8574014998383989, "eval_sts-test_spearman_euclidean": 0.8924189591158167, "eval_sts-test_spearman_manhattan": 0.8920942887144219, "eval_sts-test_spearman_max": 0.8981581293842179, "eval_vitaminc-pairs_loss": 2.066204786300659, "eval_vitaminc-pairs_runtime": 1.8428, "eval_vitaminc-pairs_samples_per_second": 58.608, "eval_vitaminc-pairs_steps_per_second": 1.085, "step": 192 }, { "epoch": 0.6, "eval_negation-triplets_loss": 0.763123095035553, "eval_negation-triplets_runtime": 0.297, "eval_negation-triplets_samples_per_second": 215.511, "eval_negation-triplets_steps_per_second": 3.367, "step": 192 }, { "epoch": 0.6, "eval_scitail-pairs-pos_loss": 0.07364190369844437, "eval_scitail-pairs-pos_runtime": 0.3662, "eval_scitail-pairs-pos_samples_per_second": 147.451, "eval_scitail-pairs-pos_steps_per_second": 2.731, "step": 192 }, { "epoch": 0.6, "eval_xsum-pairs_loss": 0.06735075265169144, "eval_xsum-pairs_runtime": 2.8409, "eval_xsum-pairs_samples_per_second": 45.056, "eval_xsum-pairs_steps_per_second": 0.704, "step": 192 }, { "epoch": 0.6, "eval_sciq_pairs_loss": 0.01930728368461132, "eval_sciq_pairs_runtime": 3.6003, "eval_sciq_pairs_samples_per_second": 35.552, "eval_sciq_pairs_steps_per_second": 0.556, "step": 192 }, { "epoch": 0.6, "eval_qasc_pairs_loss": 0.11278136074542999, "eval_qasc_pairs_runtime": 0.5997, "eval_qasc_pairs_samples_per_second": 213.437, "eval_qasc_pairs_steps_per_second": 3.335, "step": 192 }, { "epoch": 0.6, "eval_openbookqa_pairs_loss": 0.7505559921264648, "eval_openbookqa_pairs_runtime": 0.5774, "eval_openbookqa_pairs_samples_per_second": 221.691, "eval_openbookqa_pairs_steps_per_second": 3.464, "step": 192 }, { "epoch": 0.6, "eval_msmarco_pairs_loss": 0.33166375756263733, "eval_msmarco_pairs_runtime": 1.4619, "eval_msmarco_pairs_samples_per_second": 87.558, "eval_msmarco_pairs_steps_per_second": 1.368, "step": 192 }, { "epoch": 0.6, "eval_nq_pairs_loss": 0.21051406860351562, "eval_nq_pairs_runtime": 2.858, "eval_nq_pairs_samples_per_second": 44.786, "eval_nq_pairs_steps_per_second": 0.7, "step": 192 }, { "epoch": 0.6, "eval_trivia_pairs_loss": 0.7072564363479614, "eval_trivia_pairs_runtime": 4.3854, "eval_trivia_pairs_samples_per_second": 29.187, "eval_trivia_pairs_steps_per_second": 0.456, "step": 192 }, { "epoch": 0.6, "eval_gooaq_pairs_loss": 0.3748788833618164, "eval_gooaq_pairs_runtime": 1.0024, "eval_gooaq_pairs_samples_per_second": 127.692, "eval_gooaq_pairs_steps_per_second": 1.995, "step": 192 }, { "epoch": 0.6, "eval_paws-pos_loss": 0.025185449048876762, "eval_paws-pos_runtime": 0.6844, "eval_paws-pos_samples_per_second": 187.016, "eval_paws-pos_steps_per_second": 2.922, "step": 192 } ], "logging_steps": 1, "max_steps": 640, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 64, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 320, "trial_name": null, "trial_params": null }