|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 45, |
|
"global_step": 445, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022471910112359553, |
|
"grad_norm": 3.9492883682250977, |
|
"learning_rate": 1.9662921348314604e-07, |
|
"loss": 0.8103, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0044943820224719105, |
|
"grad_norm": 4.117438793182373, |
|
"learning_rate": 3.932584269662921e-07, |
|
"loss": 0.8803, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006741573033707865, |
|
"grad_norm": 3.809002161026001, |
|
"learning_rate": 5.898876404494381e-07, |
|
"loss": 0.8219, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008988764044943821, |
|
"grad_norm": 0.7417504787445068, |
|
"learning_rate": 7.865168539325842e-07, |
|
"loss": 0.0574, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011235955056179775, |
|
"grad_norm": 2.707460403442383, |
|
"learning_rate": 9.831460674157302e-07, |
|
"loss": 0.3044, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01348314606741573, |
|
"grad_norm": 3.082705497741699, |
|
"learning_rate": 1.1797752808988763e-06, |
|
"loss": 0.3306, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015730337078651686, |
|
"grad_norm": 3.102416753768921, |
|
"learning_rate": 1.3764044943820223e-06, |
|
"loss": 0.759, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017977528089887642, |
|
"grad_norm": 0.6271047592163086, |
|
"learning_rate": 1.5730337078651683e-06, |
|
"loss": 0.0472, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020224719101123594, |
|
"grad_norm": 3.1362593173980713, |
|
"learning_rate": 1.7696629213483144e-06, |
|
"loss": 0.7782, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02247191011235955, |
|
"grad_norm": 1.124997615814209, |
|
"learning_rate": 1.9662921348314604e-06, |
|
"loss": 0.0757, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024719101123595506, |
|
"grad_norm": 3.194413185119629, |
|
"learning_rate": 2.1629213483146067e-06, |
|
"loss": 0.7778, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02696629213483146, |
|
"grad_norm": 3.966202974319458, |
|
"learning_rate": 2.3595505617977525e-06, |
|
"loss": 0.7111, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029213483146067417, |
|
"grad_norm": 3.63393235206604, |
|
"learning_rate": 2.5561797752808988e-06, |
|
"loss": 0.6598, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03146067415730337, |
|
"grad_norm": 4.087065696716309, |
|
"learning_rate": 2.7528089887640446e-06, |
|
"loss": 0.8901, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033707865168539325, |
|
"grad_norm": 2.769573211669922, |
|
"learning_rate": 2.949438202247191e-06, |
|
"loss": 0.3206, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.035955056179775284, |
|
"grad_norm": 2.630620002746582, |
|
"learning_rate": 3.1460674157303367e-06, |
|
"loss": 0.3408, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038202247191011236, |
|
"grad_norm": 2.9570937156677246, |
|
"learning_rate": 3.342696629213483e-06, |
|
"loss": 0.5623, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04044943820224719, |
|
"grad_norm": 1.0999970436096191, |
|
"learning_rate": 3.5393258426966288e-06, |
|
"loss": 0.0758, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04269662921348315, |
|
"grad_norm": 5.516472816467285, |
|
"learning_rate": 3.735955056179775e-06, |
|
"loss": 0.994, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0449438202247191, |
|
"grad_norm": 6.245299816131592, |
|
"learning_rate": 3.932584269662921e-06, |
|
"loss": 2.4196, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04719101123595506, |
|
"grad_norm": 0.546605110168457, |
|
"learning_rate": 4.129213483146067e-06, |
|
"loss": 0.0561, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04943820224719101, |
|
"grad_norm": 0.7049635648727417, |
|
"learning_rate": 4.325842696629213e-06, |
|
"loss": 0.0827, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.051685393258426963, |
|
"grad_norm": 3.1022439002990723, |
|
"learning_rate": 4.522471910112359e-06, |
|
"loss": 0.7405, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05393258426966292, |
|
"grad_norm": 4.534759044647217, |
|
"learning_rate": 4.719101123595505e-06, |
|
"loss": 0.9656, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.056179775280898875, |
|
"grad_norm": 3.0486032962799072, |
|
"learning_rate": 4.915730337078652e-06, |
|
"loss": 0.7855, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.058426966292134834, |
|
"grad_norm": 3.7457478046417236, |
|
"learning_rate": 5.1123595505617975e-06, |
|
"loss": 0.6349, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.060674157303370786, |
|
"grad_norm": 3.2051479816436768, |
|
"learning_rate": 5.308988764044943e-06, |
|
"loss": 0.8087, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06292134831460675, |
|
"grad_norm": 4.389094829559326, |
|
"learning_rate": 5.505617977528089e-06, |
|
"loss": 0.9282, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0651685393258427, |
|
"grad_norm": 2.920410394668579, |
|
"learning_rate": 5.702247191011236e-06, |
|
"loss": 0.3377, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06741573033707865, |
|
"grad_norm": 2.7193148136138916, |
|
"learning_rate": 5.898876404494382e-06, |
|
"loss": 0.3289, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0696629213483146, |
|
"grad_norm": 4.0008225440979, |
|
"learning_rate": 6.0955056179775275e-06, |
|
"loss": 0.6314, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07191011235955057, |
|
"grad_norm": 0.5842159390449524, |
|
"learning_rate": 6.292134831460673e-06, |
|
"loss": 0.0611, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07415730337078652, |
|
"grad_norm": 3.1256043910980225, |
|
"learning_rate": 6.48876404494382e-06, |
|
"loss": 0.8942, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07640449438202247, |
|
"grad_norm": 0.9526051878929138, |
|
"learning_rate": 6.685393258426966e-06, |
|
"loss": 0.0701, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07865168539325842, |
|
"grad_norm": 4.061926364898682, |
|
"learning_rate": 6.882022471910112e-06, |
|
"loss": 0.8506, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08089887640449438, |
|
"grad_norm": 2.8898491859436035, |
|
"learning_rate": 7.0786516853932575e-06, |
|
"loss": 0.3386, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08314606741573034, |
|
"grad_norm": 0.9806709289550781, |
|
"learning_rate": 7.275280898876404e-06, |
|
"loss": 0.0701, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0853932584269663, |
|
"grad_norm": 3.8004391193389893, |
|
"learning_rate": 7.47191011235955e-06, |
|
"loss": 0.8042, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08764044943820225, |
|
"grad_norm": 4.089083194732666, |
|
"learning_rate": 7.668539325842697e-06, |
|
"loss": 0.8744, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0898876404494382, |
|
"grad_norm": 3.419440984725952, |
|
"learning_rate": 7.865168539325842e-06, |
|
"loss": 0.8644, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09213483146067415, |
|
"grad_norm": 4.094921588897705, |
|
"learning_rate": 8.061797752808988e-06, |
|
"loss": 0.8647, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09438202247191012, |
|
"grad_norm": 3.9199764728546143, |
|
"learning_rate": 8.258426966292133e-06, |
|
"loss": 0.7916, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09662921348314607, |
|
"grad_norm": 4.082360744476318, |
|
"learning_rate": 8.45505617977528e-06, |
|
"loss": 0.8599, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09887640449438202, |
|
"grad_norm": 0.6443855166435242, |
|
"learning_rate": 8.651685393258427e-06, |
|
"loss": 0.0523, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"grad_norm": 4.051048278808594, |
|
"learning_rate": 8.848314606741572e-06, |
|
"loss": 0.6968, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8300318121910095, |
|
"eval_VitaminC_cosine_ap": 0.5514483751609435, |
|
"eval_VitaminC_cosine_f1": 0.6657718120805369, |
|
"eval_VitaminC_cosine_f1_threshold": 0.37456807494163513, |
|
"eval_VitaminC_cosine_precision": 0.5020242914979757, |
|
"eval_VitaminC_cosine_recall": 0.9880478087649402, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 311.380615234375, |
|
"eval_VitaminC_dot_ap": 0.5333497363350208, |
|
"eval_VitaminC_dot_f1": 0.6684709066305818, |
|
"eval_VitaminC_dot_f1_threshold": 144.8927001953125, |
|
"eval_VitaminC_dot_precision": 0.5061475409836066, |
|
"eval_VitaminC_dot_recall": 0.9840637450199203, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.028482437133789, |
|
"eval_VitaminC_euclidean_ap": 0.5544340410314673, |
|
"eval_VitaminC_euclidean_f1": 0.6649006622516557, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.38451385498047, |
|
"eval_VitaminC_euclidean_precision": 0.498015873015873, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.556640625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 232.38790893554688, |
|
"eval_VitaminC_manhattan_ap": 0.5515569514532939, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 498.126220703125, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 311.380615234375, |
|
"eval_VitaminC_max_ap": 0.5544340410314673, |
|
"eval_VitaminC_max_f1": 0.6684709066305818, |
|
"eval_VitaminC_max_f1_threshold": 498.126220703125, |
|
"eval_VitaminC_max_precision": 0.5061475409836066, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5544340410314673, |
|
"eval_sts-test_pearson_cosine": 0.8803067271464453, |
|
"eval_sts-test_pearson_dot": 0.8698285291814508, |
|
"eval_sts-test_pearson_euclidean": 0.9023937835918766, |
|
"eval_sts-test_pearson_manhattan": 0.9020751259156048, |
|
"eval_sts-test_pearson_max": 0.9023937835918766, |
|
"eval_sts-test_spearman_cosine": 0.9038005474254912, |
|
"eval_sts-test_spearman_dot": 0.8707897794601254, |
|
"eval_sts-test_spearman_euclidean": 0.8989733631129851, |
|
"eval_sts-test_spearman_manhattan": 0.8980189529612906, |
|
"eval_sts-test_spearman_max": 0.9038005474254912, |
|
"eval_vitaminc-pairs_loss": 1.7273772954940796, |
|
"eval_vitaminc-pairs_runtime": 1.8924, |
|
"eval_vitaminc-pairs_samples_per_second": 57.071, |
|
"eval_vitaminc-pairs_steps_per_second": 1.057, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_negation-triplets_loss": 0.9174526929855347, |
|
"eval_negation-triplets_runtime": 0.2972, |
|
"eval_negation-triplets_samples_per_second": 215.314, |
|
"eval_negation-triplets_steps_per_second": 3.364, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_scitail-pairs-pos_loss": 0.07368183881044388, |
|
"eval_scitail-pairs-pos_runtime": 0.379, |
|
"eval_scitail-pairs-pos_samples_per_second": 142.492, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.639, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_scitail-pairs-qa_loss": 0.001584450714290142, |
|
"eval_scitail-pairs-qa_runtime": 0.5178, |
|
"eval_scitail-pairs-qa_samples_per_second": 247.198, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.862, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_xsum-pairs_loss": 0.038235221058130264, |
|
"eval_xsum-pairs_runtime": 2.7268, |
|
"eval_xsum-pairs_samples_per_second": 46.941, |
|
"eval_xsum-pairs_steps_per_second": 0.733, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_sciq_pairs_loss": 0.01538097020238638, |
|
"eval_sciq_pairs_runtime": 2.7808, |
|
"eval_sciq_pairs_samples_per_second": 46.029, |
|
"eval_sciq_pairs_steps_per_second": 0.719, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_qasc_pairs_loss": 0.09078988432884216, |
|
"eval_qasc_pairs_runtime": 0.6473, |
|
"eval_qasc_pairs_samples_per_second": 197.758, |
|
"eval_qasc_pairs_steps_per_second": 3.09, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_openbookqa_pairs_loss": 0.6754768490791321, |
|
"eval_openbookqa_pairs_runtime": 0.573, |
|
"eval_openbookqa_pairs_samples_per_second": 223.397, |
|
"eval_openbookqa_pairs_steps_per_second": 3.491, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_msmarco_pairs_loss": 0.15991328656673431, |
|
"eval_msmarco_pairs_runtime": 1.487, |
|
"eval_msmarco_pairs_samples_per_second": 86.078, |
|
"eval_msmarco_pairs_steps_per_second": 1.345, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_nq_pairs_loss": 0.09591890126466751, |
|
"eval_nq_pairs_runtime": 2.3943, |
|
"eval_nq_pairs_samples_per_second": 53.459, |
|
"eval_nq_pairs_steps_per_second": 0.835, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_trivia_pairs_loss": 0.5305934548377991, |
|
"eval_trivia_pairs_runtime": 3.5752, |
|
"eval_trivia_pairs_samples_per_second": 35.802, |
|
"eval_trivia_pairs_steps_per_second": 0.559, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_gooaq_pairs_loss": 0.29681000113487244, |
|
"eval_gooaq_pairs_runtime": 0.9087, |
|
"eval_gooaq_pairs_samples_per_second": 140.861, |
|
"eval_gooaq_pairs_steps_per_second": 2.201, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10112359550561797, |
|
"eval_paws-pos_loss": 0.024501051753759384, |
|
"eval_paws-pos_runtime": 0.6773, |
|
"eval_paws-pos_samples_per_second": 188.996, |
|
"eval_paws-pos_steps_per_second": 2.953, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10337078651685393, |
|
"grad_norm": 2.9021923542022705, |
|
"learning_rate": 9.044943820224718e-06, |
|
"loss": 0.3376, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10561797752808989, |
|
"grad_norm": 3.179288625717163, |
|
"learning_rate": 9.241573033707863e-06, |
|
"loss": 0.5174, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10786516853932585, |
|
"grad_norm": 3.1919493675231934, |
|
"learning_rate": 9.43820224719101e-06, |
|
"loss": 0.8162, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1101123595505618, |
|
"grad_norm": 2.8602521419525146, |
|
"learning_rate": 9.634831460674157e-06, |
|
"loss": 0.3545, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11235955056179775, |
|
"grad_norm": 2.7570478916168213, |
|
"learning_rate": 9.831460674157303e-06, |
|
"loss": 0.315, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1146067415730337, |
|
"grad_norm": 0.8641514778137207, |
|
"learning_rate": 1.0028089887640448e-05, |
|
"loss": 0.0627, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11685393258426967, |
|
"grad_norm": 3.9437484741210938, |
|
"learning_rate": 1.0224719101123595e-05, |
|
"loss": 0.8851, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11910112359550562, |
|
"grad_norm": 4.144773006439209, |
|
"learning_rate": 1.042134831460674e-05, |
|
"loss": 0.8382, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12134831460674157, |
|
"grad_norm": 4.277736186981201, |
|
"learning_rate": 1.0617977528089887e-05, |
|
"loss": 0.733, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12359550561797752, |
|
"grad_norm": 4.025904178619385, |
|
"learning_rate": 1.0814606741573032e-05, |
|
"loss": 0.7173, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1258426966292135, |
|
"grad_norm": 3.923046827316284, |
|
"learning_rate": 1.1011235955056178e-05, |
|
"loss": 0.7659, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12808988764044943, |
|
"grad_norm": 3.2707138061523438, |
|
"learning_rate": 1.1207865168539325e-05, |
|
"loss": 0.793, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1303370786516854, |
|
"grad_norm": 3.1660959720611572, |
|
"learning_rate": 1.1404494382022472e-05, |
|
"loss": 0.5426, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13258426966292136, |
|
"grad_norm": 4.5236663818359375, |
|
"learning_rate": 1.1601123595505617e-05, |
|
"loss": 0.7641, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1348314606741573, |
|
"grad_norm": 0.5771021246910095, |
|
"learning_rate": 1.1797752808988763e-05, |
|
"loss": 0.0657, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13707865168539327, |
|
"grad_norm": 3.8541343212127686, |
|
"learning_rate": 1.1994382022471908e-05, |
|
"loss": 0.7836, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1393258426966292, |
|
"grad_norm": 4.284148693084717, |
|
"learning_rate": 1.2191011235955055e-05, |
|
"loss": 0.9306, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14157303370786517, |
|
"grad_norm": 4.175032615661621, |
|
"learning_rate": 1.23876404494382e-05, |
|
"loss": 0.8673, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14382022471910114, |
|
"grad_norm": 5.025452136993408, |
|
"learning_rate": 1.2584269662921347e-05, |
|
"loss": 0.9296, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14606741573033707, |
|
"grad_norm": 3.970745086669922, |
|
"learning_rate": 1.2780898876404493e-05, |
|
"loss": 0.8211, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14831460674157304, |
|
"grad_norm": 3.150197744369507, |
|
"learning_rate": 1.297752808988764e-05, |
|
"loss": 0.7685, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15056179775280898, |
|
"grad_norm": 4.280994415283203, |
|
"learning_rate": 1.3174157303370785e-05, |
|
"loss": 0.7139, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15280898876404495, |
|
"grad_norm": 4.288730621337891, |
|
"learning_rate": 1.3370786516853932e-05, |
|
"loss": 0.8241, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1550561797752809, |
|
"grad_norm": 3.7402424812316895, |
|
"learning_rate": 1.3567415730337077e-05, |
|
"loss": 0.6256, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.15730337078651685, |
|
"grad_norm": 4.478890895843506, |
|
"learning_rate": 1.3764044943820223e-05, |
|
"loss": 0.8842, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15955056179775282, |
|
"grad_norm": 3.8147876262664795, |
|
"learning_rate": 1.3960674157303368e-05, |
|
"loss": 0.804, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16179775280898875, |
|
"grad_norm": 0.7314035296440125, |
|
"learning_rate": 1.4157303370786515e-05, |
|
"loss": 0.0989, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16404494382022472, |
|
"grad_norm": 3.074303150177002, |
|
"learning_rate": 1.4353932584269662e-05, |
|
"loss": 0.332, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1662921348314607, |
|
"grad_norm": 3.414987325668335, |
|
"learning_rate": 1.4550561797752808e-05, |
|
"loss": 0.5736, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16853932584269662, |
|
"grad_norm": 3.7946674823760986, |
|
"learning_rate": 1.4747191011235953e-05, |
|
"loss": 0.8285, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1707865168539326, |
|
"grad_norm": 4.310474395751953, |
|
"learning_rate": 1.49438202247191e-05, |
|
"loss": 0.9561, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.17303370786516853, |
|
"grad_norm": 0.9791378974914551, |
|
"learning_rate": 1.5140449438202245e-05, |
|
"loss": 0.0633, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1752808988764045, |
|
"grad_norm": 0.6351795196533203, |
|
"learning_rate": 1.5337078651685393e-05, |
|
"loss": 0.0848, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17752808988764046, |
|
"grad_norm": 3.4832303524017334, |
|
"learning_rate": 1.553370786516854e-05, |
|
"loss": 0.8325, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1797752808988764, |
|
"grad_norm": 5.115800380706787, |
|
"learning_rate": 1.5730337078651683e-05, |
|
"loss": 1.0011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18202247191011237, |
|
"grad_norm": 3.552396297454834, |
|
"learning_rate": 1.592696629213483e-05, |
|
"loss": 0.8697, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1842696629213483, |
|
"grad_norm": 4.491541862487793, |
|
"learning_rate": 1.6123595505617977e-05, |
|
"loss": 0.8344, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18651685393258427, |
|
"grad_norm": 4.73278284072876, |
|
"learning_rate": 1.6320224719101122e-05, |
|
"loss": 0.9967, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18876404494382024, |
|
"grad_norm": 2.994192123413086, |
|
"learning_rate": 1.6516853932584267e-05, |
|
"loss": 0.4638, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19101123595505617, |
|
"grad_norm": 4.142394542694092, |
|
"learning_rate": 1.6713483146067415e-05, |
|
"loss": 0.8994, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19325842696629214, |
|
"grad_norm": 4.149839401245117, |
|
"learning_rate": 1.691011235955056e-05, |
|
"loss": 0.7789, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.19550561797752808, |
|
"grad_norm": 0.45795938372612, |
|
"learning_rate": 1.7106741573033705e-05, |
|
"loss": 0.0555, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19775280898876405, |
|
"grad_norm": 3.4293618202209473, |
|
"learning_rate": 1.7303370786516853e-05, |
|
"loss": 0.3778, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.041529655456543, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.708, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"grad_norm": 0.6160458922386169, |
|
"learning_rate": 1.7696629213483143e-05, |
|
"loss": 0.0689, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.74173903465271, |
|
"eval_VitaminC_cosine_ap": 0.5513770735348443, |
|
"eval_VitaminC_cosine_f1": 0.6675531914893617, |
|
"eval_VitaminC_cosine_f1_threshold": 0.32480987906455994, |
|
"eval_VitaminC_cosine_precision": 0.500998003992016, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.560546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 297.664794921875, |
|
"eval_VitaminC_dot_ap": 0.5340088824099496, |
|
"eval_VitaminC_dot_f1": 0.6666666666666667, |
|
"eval_VitaminC_dot_f1_threshold": 126.67618560791016, |
|
"eval_VitaminC_dot_precision": 0.501002004008016, |
|
"eval_VitaminC_dot_recall": 0.9960159362549801, |
|
"eval_VitaminC_euclidean_accuracy": 0.55859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 14.345688819885254, |
|
"eval_VitaminC_euclidean_ap": 0.5542145004976253, |
|
"eval_VitaminC_euclidean_f1": 0.6675531914893617, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.381019592285156, |
|
"eval_VitaminC_euclidean_precision": 0.500998003992016, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.552734375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 232.7296142578125, |
|
"eval_VitaminC_manhattan_ap": 0.5523953693907266, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 496.4290466308594, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 297.664794921875, |
|
"eval_VitaminC_max_ap": 0.5542145004976253, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 496.4290466308594, |
|
"eval_VitaminC_max_precision": 0.501002004008016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5542145004976253, |
|
"eval_sts-test_pearson_cosine": 0.8800782580988616, |
|
"eval_sts-test_pearson_dot": 0.8687642290872662, |
|
"eval_sts-test_pearson_euclidean": 0.9034088230546415, |
|
"eval_sts-test_pearson_manhattan": 0.9030146212284895, |
|
"eval_sts-test_pearson_max": 0.9034088230546415, |
|
"eval_sts-test_spearman_cosine": 0.904560289590133, |
|
"eval_sts-test_spearman_dot": 0.8705944849554133, |
|
"eval_sts-test_spearman_euclidean": 0.8998959103665689, |
|
"eval_sts-test_spearman_manhattan": 0.8995891404697307, |
|
"eval_sts-test_spearman_max": 0.904560289590133, |
|
"eval_vitaminc-pairs_loss": 1.6141985654830933, |
|
"eval_vitaminc-pairs_runtime": 1.864, |
|
"eval_vitaminc-pairs_samples_per_second": 57.94, |
|
"eval_vitaminc-pairs_steps_per_second": 1.073, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_negation-triplets_loss": 0.9220322370529175, |
|
"eval_negation-triplets_runtime": 0.3199, |
|
"eval_negation-triplets_samples_per_second": 200.043, |
|
"eval_negation-triplets_steps_per_second": 3.126, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_scitail-pairs-pos_loss": 0.0654294565320015, |
|
"eval_scitail-pairs-pos_runtime": 0.4625, |
|
"eval_scitail-pairs-pos_samples_per_second": 116.76, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_scitail-pairs-qa_loss": 0.0015887805493548512, |
|
"eval_scitail-pairs-qa_runtime": 0.5768, |
|
"eval_scitail-pairs-qa_samples_per_second": 221.899, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.467, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_xsum-pairs_loss": 0.03991687670350075, |
|
"eval_xsum-pairs_runtime": 2.7403, |
|
"eval_xsum-pairs_samples_per_second": 46.71, |
|
"eval_xsum-pairs_steps_per_second": 0.73, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_sciq_pairs_loss": 0.01584962010383606, |
|
"eval_sciq_pairs_runtime": 2.8429, |
|
"eval_sciq_pairs_samples_per_second": 45.024, |
|
"eval_sciq_pairs_steps_per_second": 0.703, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_qasc_pairs_loss": 0.09112343192100525, |
|
"eval_qasc_pairs_runtime": 0.6492, |
|
"eval_qasc_pairs_samples_per_second": 197.154, |
|
"eval_qasc_pairs_steps_per_second": 3.081, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_openbookqa_pairs_loss": 0.7132729887962341, |
|
"eval_openbookqa_pairs_runtime": 0.5847, |
|
"eval_openbookqa_pairs_samples_per_second": 218.922, |
|
"eval_openbookqa_pairs_steps_per_second": 3.421, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_msmarco_pairs_loss": 0.15173853933811188, |
|
"eval_msmarco_pairs_runtime": 1.4966, |
|
"eval_msmarco_pairs_samples_per_second": 85.527, |
|
"eval_msmarco_pairs_steps_per_second": 1.336, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_nq_pairs_loss": 0.09653442353010178, |
|
"eval_nq_pairs_runtime": 2.3749, |
|
"eval_nq_pairs_samples_per_second": 53.897, |
|
"eval_nq_pairs_steps_per_second": 0.842, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_trivia_pairs_loss": 0.5191965699195862, |
|
"eval_trivia_pairs_runtime": 3.6006, |
|
"eval_trivia_pairs_samples_per_second": 35.55, |
|
"eval_trivia_pairs_steps_per_second": 0.555, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_gooaq_pairs_loss": 0.30713126063346863, |
|
"eval_gooaq_pairs_runtime": 0.9131, |
|
"eval_gooaq_pairs_samples_per_second": 140.178, |
|
"eval_gooaq_pairs_steps_per_second": 2.19, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20224719101123595, |
|
"eval_paws-pos_loss": 0.024471310898661613, |
|
"eval_paws-pos_runtime": 0.6872, |
|
"eval_paws-pos_samples_per_second": 186.254, |
|
"eval_paws-pos_steps_per_second": 2.91, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20449438202247192, |
|
"grad_norm": 6.209661483764648, |
|
"learning_rate": 1.7893258426966292e-05, |
|
"loss": 2.3489, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.20674157303370785, |
|
"grad_norm": 3.1821141242980957, |
|
"learning_rate": 1.8089887640449437e-05, |
|
"loss": 0.741, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.20898876404494382, |
|
"grad_norm": 3.871994972229004, |
|
"learning_rate": 1.8286516853932585e-05, |
|
"loss": 0.7729, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21123595505617979, |
|
"grad_norm": 0.5280765891075134, |
|
"learning_rate": 1.8483146067415727e-05, |
|
"loss": 0.0631, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.21348314606741572, |
|
"grad_norm": 4.475915431976318, |
|
"learning_rate": 1.8679775280898875e-05, |
|
"loss": 0.9342, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2157303370786517, |
|
"grad_norm": 3.949381113052368, |
|
"learning_rate": 1.887640449438202e-05, |
|
"loss": 0.8581, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21797752808988763, |
|
"grad_norm": 2.910426616668701, |
|
"learning_rate": 1.907303370786517e-05, |
|
"loss": 0.5198, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2202247191011236, |
|
"grad_norm": 4.028941631317139, |
|
"learning_rate": 1.9269662921348313e-05, |
|
"loss": 0.846, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.22247191011235956, |
|
"grad_norm": 4.183433532714844, |
|
"learning_rate": 1.946629213483146e-05, |
|
"loss": 0.6581, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2247191011235955, |
|
"grad_norm": 3.348114252090454, |
|
"learning_rate": 1.9662921348314607e-05, |
|
"loss": 0.3579, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22696629213483147, |
|
"grad_norm": 4.055211544036865, |
|
"learning_rate": 1.9859550561797752e-05, |
|
"loss": 0.908, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2292134831460674, |
|
"grad_norm": 1.0024710893630981, |
|
"learning_rate": 2.0056179775280897e-05, |
|
"loss": 0.0664, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.23146067415730337, |
|
"grad_norm": 3.582249641418457, |
|
"learning_rate": 2.0252808988764042e-05, |
|
"loss": 0.5411, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23370786516853934, |
|
"grad_norm": 4.226349830627441, |
|
"learning_rate": 2.044943820224719e-05, |
|
"loss": 0.9163, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23595505617977527, |
|
"grad_norm": 3.002727508544922, |
|
"learning_rate": 2.0646067415730335e-05, |
|
"loss": 0.7975, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23820224719101124, |
|
"grad_norm": 3.5497515201568604, |
|
"learning_rate": 2.084269662921348e-05, |
|
"loss": 0.37, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.24044943820224718, |
|
"grad_norm": 4.381045341491699, |
|
"learning_rate": 2.103932584269663e-05, |
|
"loss": 0.8495, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24269662921348314, |
|
"grad_norm": 3.926840305328369, |
|
"learning_rate": 2.1235955056179773e-05, |
|
"loss": 0.8073, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2449438202247191, |
|
"grad_norm": 3.0835390090942383, |
|
"learning_rate": 2.1432584269662922e-05, |
|
"loss": 0.7563, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24719101123595505, |
|
"grad_norm": 4.230669975280762, |
|
"learning_rate": 2.1629213483146063e-05, |
|
"loss": 0.6585, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24943820224719102, |
|
"grad_norm": 2.8849070072174072, |
|
"learning_rate": 2.1825842696629212e-05, |
|
"loss": 0.3246, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.251685393258427, |
|
"grad_norm": 4.796951770782471, |
|
"learning_rate": 2.2022471910112357e-05, |
|
"loss": 0.9718, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2539325842696629, |
|
"grad_norm": 4.60318660736084, |
|
"learning_rate": 2.2219101123595505e-05, |
|
"loss": 0.8584, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.25617977528089886, |
|
"grad_norm": 3.098703384399414, |
|
"learning_rate": 2.241573033707865e-05, |
|
"loss": 0.3385, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.25842696629213485, |
|
"grad_norm": 2.9519224166870117, |
|
"learning_rate": 2.2612359550561795e-05, |
|
"loss": 0.323, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2606741573033708, |
|
"grad_norm": 2.913742780685425, |
|
"learning_rate": 2.2808988764044944e-05, |
|
"loss": 0.3359, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26292134831460673, |
|
"grad_norm": 4.148440837860107, |
|
"learning_rate": 2.300561797752809e-05, |
|
"loss": 0.6955, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2651685393258427, |
|
"grad_norm": 0.8463248610496521, |
|
"learning_rate": 2.3202247191011234e-05, |
|
"loss": 0.0539, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.26741573033707866, |
|
"grad_norm": 0.7284589409828186, |
|
"learning_rate": 2.339887640449438e-05, |
|
"loss": 0.0507, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2696629213483146, |
|
"grad_norm": 3.615086317062378, |
|
"learning_rate": 2.3595505617977527e-05, |
|
"loss": 0.314, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27191011235955054, |
|
"grad_norm": 5.229820728302002, |
|
"learning_rate": 2.3792134831460672e-05, |
|
"loss": 1.0339, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.27415730337078653, |
|
"grad_norm": 3.6847782135009766, |
|
"learning_rate": 2.3988764044943817e-05, |
|
"loss": 0.3158, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.27640449438202247, |
|
"grad_norm": 4.280517578125, |
|
"learning_rate": 2.4185393258426965e-05, |
|
"loss": 0.7809, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2786516853932584, |
|
"grad_norm": 4.476150035858154, |
|
"learning_rate": 2.438202247191011e-05, |
|
"loss": 0.9516, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2808988764044944, |
|
"grad_norm": 2.7380239963531494, |
|
"learning_rate": 2.457865168539326e-05, |
|
"loss": 0.3117, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28314606741573034, |
|
"grad_norm": 3.9667162895202637, |
|
"learning_rate": 2.47752808988764e-05, |
|
"loss": 0.8366, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2853932584269663, |
|
"grad_norm": 4.552999019622803, |
|
"learning_rate": 2.497191011235955e-05, |
|
"loss": 0.8033, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2876404494382023, |
|
"grad_norm": 3.4238576889038086, |
|
"learning_rate": 2.5168539325842694e-05, |
|
"loss": 0.7253, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2898876404494382, |
|
"grad_norm": 4.677807331085205, |
|
"learning_rate": 2.5365168539325842e-05, |
|
"loss": 0.8345, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29213483146067415, |
|
"grad_norm": 4.282113075256348, |
|
"learning_rate": 2.5561797752808987e-05, |
|
"loss": 0.7532, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2943820224719101, |
|
"grad_norm": 4.375221252441406, |
|
"learning_rate": 2.5758426966292132e-05, |
|
"loss": 0.8247, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2966292134831461, |
|
"grad_norm": 3.2591633796691895, |
|
"learning_rate": 2.595505617977528e-05, |
|
"loss": 0.5175, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.298876404494382, |
|
"grad_norm": 4.146636962890625, |
|
"learning_rate": 2.6151685393258425e-05, |
|
"loss": 0.7813, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.30112359550561796, |
|
"grad_norm": 4.2413249015808105, |
|
"learning_rate": 2.634831460674157e-05, |
|
"loss": 0.6582, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"grad_norm": 4.541455268859863, |
|
"learning_rate": 2.6544943820224715e-05, |
|
"loss": 0.3484, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_VitaminC_cosine_accuracy": 0.560546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7956135272979736, |
|
"eval_VitaminC_cosine_ap": 0.5505565383154402, |
|
"eval_VitaminC_cosine_f1": 0.6684709066305818, |
|
"eval_VitaminC_cosine_f1_threshold": 0.40466147661209106, |
|
"eval_VitaminC_cosine_precision": 0.5061475409836066, |
|
"eval_VitaminC_cosine_recall": 0.9840637450199203, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 312.2774658203125, |
|
"eval_VitaminC_dot_ap": 0.5365135091766033, |
|
"eval_VitaminC_dot_f1": 0.6684856753069577, |
|
"eval_VitaminC_dot_f1_threshold": 157.33203125, |
|
"eval_VitaminC_dot_precision": 0.508298755186722, |
|
"eval_VitaminC_dot_recall": 0.9760956175298805, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.184114456176758, |
|
"eval_VitaminC_euclidean_ap": 0.5517706579195627, |
|
"eval_VitaminC_euclidean_f1": 0.6649006622516557, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.68879508972168, |
|
"eval_VitaminC_euclidean_precision": 0.498015873015873, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 241.22061157226562, |
|
"eval_VitaminC_manhattan_ap": 0.5494156168773414, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 510.2530212402344, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 312.2774658203125, |
|
"eval_VitaminC_max_ap": 0.5517706579195627, |
|
"eval_VitaminC_max_f1": 0.6684856753069577, |
|
"eval_VitaminC_max_f1_threshold": 510.2530212402344, |
|
"eval_VitaminC_max_precision": 0.508298755186722, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5517706579195627, |
|
"eval_sts-test_pearson_cosine": 0.8812438499723412, |
|
"eval_sts-test_pearson_dot": 0.8695651753004092, |
|
"eval_sts-test_pearson_euclidean": 0.9036940037118162, |
|
"eval_sts-test_pearson_manhattan": 0.9035516699922166, |
|
"eval_sts-test_pearson_max": 0.9036940037118162, |
|
"eval_sts-test_spearman_cosine": 0.9049742835092648, |
|
"eval_sts-test_spearman_dot": 0.8707925987895928, |
|
"eval_sts-test_spearman_euclidean": 0.9003956924537878, |
|
"eval_sts-test_spearman_manhattan": 0.9002747745455083, |
|
"eval_sts-test_spearman_max": 0.9049742835092648, |
|
"eval_vitaminc-pairs_loss": 1.5520410537719727, |
|
"eval_vitaminc-pairs_runtime": 1.8323, |
|
"eval_vitaminc-pairs_samples_per_second": 58.943, |
|
"eval_vitaminc-pairs_steps_per_second": 1.092, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_negation-triplets_loss": 0.9211694002151489, |
|
"eval_negation-triplets_runtime": 0.2923, |
|
"eval_negation-triplets_samples_per_second": 218.93, |
|
"eval_negation-triplets_steps_per_second": 3.421, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_scitail-pairs-pos_loss": 0.07377135753631592, |
|
"eval_scitail-pairs-pos_runtime": 0.3681, |
|
"eval_scitail-pairs-pos_samples_per_second": 146.691, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.716, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_scitail-pairs-qa_loss": 0.00150959100574255, |
|
"eval_scitail-pairs-qa_runtime": 0.5123, |
|
"eval_scitail-pairs-qa_samples_per_second": 249.842, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.904, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_xsum-pairs_loss": 0.036599572747945786, |
|
"eval_xsum-pairs_runtime": 2.7238, |
|
"eval_xsum-pairs_samples_per_second": 46.994, |
|
"eval_xsum-pairs_steps_per_second": 0.734, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_sciq_pairs_loss": 0.01615014858543873, |
|
"eval_sciq_pairs_runtime": 2.8064, |
|
"eval_sciq_pairs_samples_per_second": 45.61, |
|
"eval_sciq_pairs_steps_per_second": 0.713, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_qasc_pairs_loss": 0.09235507994890213, |
|
"eval_qasc_pairs_runtime": 0.6488, |
|
"eval_qasc_pairs_samples_per_second": 197.276, |
|
"eval_qasc_pairs_steps_per_second": 3.082, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_openbookqa_pairs_loss": 0.6891775727272034, |
|
"eval_openbookqa_pairs_runtime": 0.5698, |
|
"eval_openbookqa_pairs_samples_per_second": 224.641, |
|
"eval_openbookqa_pairs_steps_per_second": 3.51, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_msmarco_pairs_loss": 0.16766037046909332, |
|
"eval_msmarco_pairs_runtime": 1.4798, |
|
"eval_msmarco_pairs_samples_per_second": 86.499, |
|
"eval_msmarco_pairs_steps_per_second": 1.352, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_nq_pairs_loss": 0.09737721085548401, |
|
"eval_nq_pairs_runtime": 2.3409, |
|
"eval_nq_pairs_samples_per_second": 54.68, |
|
"eval_nq_pairs_steps_per_second": 0.854, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_trivia_pairs_loss": 0.5458433032035828, |
|
"eval_trivia_pairs_runtime": 3.5771, |
|
"eval_trivia_pairs_samples_per_second": 35.783, |
|
"eval_trivia_pairs_steps_per_second": 0.559, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_gooaq_pairs_loss": 0.3082329332828522, |
|
"eval_gooaq_pairs_runtime": 0.9181, |
|
"eval_gooaq_pairs_samples_per_second": 139.413, |
|
"eval_gooaq_pairs_steps_per_second": 2.178, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30337078651685395, |
|
"eval_paws-pos_loss": 0.02423396334052086, |
|
"eval_paws-pos_runtime": 0.6827, |
|
"eval_paws-pos_samples_per_second": 187.501, |
|
"eval_paws-pos_steps_per_second": 2.93, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3056179775280899, |
|
"grad_norm": 4.549901485443115, |
|
"learning_rate": 2.6741573033707864e-05, |
|
"loss": 0.7648, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30786516853932583, |
|
"grad_norm": 3.225851535797119, |
|
"learning_rate": 2.693820224719101e-05, |
|
"loss": 0.7554, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3101123595505618, |
|
"grad_norm": 0.6228423118591309, |
|
"learning_rate": 2.7134831460674154e-05, |
|
"loss": 0.0753, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.31235955056179776, |
|
"grad_norm": 3.12802791595459, |
|
"learning_rate": 2.7331460674157302e-05, |
|
"loss": 0.4987, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3146067415730337, |
|
"grad_norm": 4.1997880935668945, |
|
"learning_rate": 2.7528089887640447e-05, |
|
"loss": 0.8543, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31685393258426964, |
|
"grad_norm": 4.3362860679626465, |
|
"learning_rate": 2.7724719101123595e-05, |
|
"loss": 0.9425, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.31910112359550563, |
|
"grad_norm": 0.5599316954612732, |
|
"learning_rate": 2.7921348314606737e-05, |
|
"loss": 0.0472, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.32134831460674157, |
|
"grad_norm": 3.503603458404541, |
|
"learning_rate": 2.8117977528089885e-05, |
|
"loss": 0.848, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3235955056179775, |
|
"grad_norm": 4.712310314178467, |
|
"learning_rate": 2.831460674157303e-05, |
|
"loss": 0.8946, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3258426966292135, |
|
"grad_norm": 3.1823527812957764, |
|
"learning_rate": 2.851123595505618e-05, |
|
"loss": 0.7841, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32808988764044944, |
|
"grad_norm": 4.423196315765381, |
|
"learning_rate": 2.8707865168539324e-05, |
|
"loss": 0.6653, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3303370786516854, |
|
"grad_norm": 4.137822151184082, |
|
"learning_rate": 2.890449438202247e-05, |
|
"loss": 0.3522, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3325842696629214, |
|
"grad_norm": 2.997777223587036, |
|
"learning_rate": 2.9101123595505617e-05, |
|
"loss": 0.4853, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3348314606741573, |
|
"grad_norm": 2.89650559425354, |
|
"learning_rate": 2.9297752808988762e-05, |
|
"loss": 0.4726, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.33707865168539325, |
|
"grad_norm": 5.486624717712402, |
|
"learning_rate": 2.9494382022471907e-05, |
|
"loss": 0.8693, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3393258426966292, |
|
"grad_norm": 4.800889015197754, |
|
"learning_rate": 2.9691011235955052e-05, |
|
"loss": 0.8124, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3415730337078652, |
|
"grad_norm": 4.188066005706787, |
|
"learning_rate": 2.98876404494382e-05, |
|
"loss": 0.8206, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3438202247191011, |
|
"grad_norm": 4.340461254119873, |
|
"learning_rate": 3.0084269662921345e-05, |
|
"loss": 0.9406, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.34606741573033706, |
|
"grad_norm": 4.658304214477539, |
|
"learning_rate": 3.028089887640449e-05, |
|
"loss": 0.7944, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.34831460674157305, |
|
"grad_norm": 0.6266987919807434, |
|
"learning_rate": 3.047752808988764e-05, |
|
"loss": 0.0766, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.350561797752809, |
|
"grad_norm": 4.252346515655518, |
|
"learning_rate": 3.067415730337079e-05, |
|
"loss": 0.8609, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.35280898876404493, |
|
"grad_norm": 4.9649658203125, |
|
"learning_rate": 3.087078651685393e-05, |
|
"loss": 1.0533, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.3550561797752809, |
|
"grad_norm": 4.485607624053955, |
|
"learning_rate": 3.106741573033708e-05, |
|
"loss": 0.8396, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.35730337078651686, |
|
"grad_norm": 3.241231918334961, |
|
"learning_rate": 3.126404494382022e-05, |
|
"loss": 0.7865, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3595505617977528, |
|
"grad_norm": 6.846582889556885, |
|
"learning_rate": 3.146067415730337e-05, |
|
"loss": 2.4616, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36179775280898874, |
|
"grad_norm": 0.5514687895774841, |
|
"learning_rate": 3.165730337078651e-05, |
|
"loss": 0.0556, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.36404494382022473, |
|
"grad_norm": 3.7877562046051025, |
|
"learning_rate": 3.185393258426966e-05, |
|
"loss": 0.3758, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.36629213483146067, |
|
"grad_norm": 5.397939682006836, |
|
"learning_rate": 3.205056179775281e-05, |
|
"loss": 0.9312, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3685393258426966, |
|
"grad_norm": 4.301459312438965, |
|
"learning_rate": 3.2247191011235954e-05, |
|
"loss": 0.7993, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3707865168539326, |
|
"grad_norm": 4.49428129196167, |
|
"learning_rate": 3.24438202247191e-05, |
|
"loss": 0.8104, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.37303370786516854, |
|
"grad_norm": 3.2210912704467773, |
|
"learning_rate": 3.2640449438202244e-05, |
|
"loss": 0.8199, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3752808988764045, |
|
"grad_norm": 5.359859466552734, |
|
"learning_rate": 3.283707865168539e-05, |
|
"loss": 1.0724, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3775280898876405, |
|
"grad_norm": 4.00059700012207, |
|
"learning_rate": 3.3033707865168534e-05, |
|
"loss": 0.3521, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3797752808988764, |
|
"grad_norm": 4.418768882751465, |
|
"learning_rate": 3.3230337078651685e-05, |
|
"loss": 0.8536, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.38202247191011235, |
|
"grad_norm": 4.15454626083374, |
|
"learning_rate": 3.342696629213483e-05, |
|
"loss": 0.872, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3842696629213483, |
|
"grad_norm": 3.8060054779052734, |
|
"learning_rate": 3.3623595505617975e-05, |
|
"loss": 0.8009, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3865168539325843, |
|
"grad_norm": 3.584745407104492, |
|
"learning_rate": 3.382022471910112e-05, |
|
"loss": 0.7798, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3887640449438202, |
|
"grad_norm": 4.861410140991211, |
|
"learning_rate": 3.4016853932584265e-05, |
|
"loss": 0.5953, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.39101123595505616, |
|
"grad_norm": 3.983793020248413, |
|
"learning_rate": 3.421348314606741e-05, |
|
"loss": 0.7562, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.39325842696629215, |
|
"grad_norm": 4.841738224029541, |
|
"learning_rate": 3.4410112359550555e-05, |
|
"loss": 0.7227, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3955056179775281, |
|
"grad_norm": 4.787370204925537, |
|
"learning_rate": 3.460674157303371e-05, |
|
"loss": 0.8953, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.39775280898876403, |
|
"grad_norm": 4.337812900543213, |
|
"learning_rate": 3.480337078651685e-05, |
|
"loss": 0.7102, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.9599294662475586, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0667, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.40224719101123596, |
|
"grad_norm": 0.6864398717880249, |
|
"learning_rate": 3.4999863718440846e-05, |
|
"loss": 0.0528, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"grad_norm": 4.738316059112549, |
|
"learning_rate": 3.499945487641664e-05, |
|
"loss": 0.7312, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8256886005401611, |
|
"eval_VitaminC_cosine_ap": 0.5557251062538118, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666667, |
|
"eval_VitaminC_cosine_f1_threshold": 0.4391498863697052, |
|
"eval_VitaminC_cosine_precision": 0.5051334702258727, |
|
"eval_VitaminC_cosine_recall": 0.9800796812749004, |
|
"eval_VitaminC_dot_accuracy": 0.556640625, |
|
"eval_VitaminC_dot_accuracy_threshold": 314.2790832519531, |
|
"eval_VitaminC_dot_ap": 0.5397120960874565, |
|
"eval_VitaminC_dot_f1": 0.6684636118598383, |
|
"eval_VitaminC_dot_f1_threshold": 144.02464294433594, |
|
"eval_VitaminC_dot_precision": 0.505091649694501, |
|
"eval_VitaminC_dot_recall": 0.9880478087649402, |
|
"eval_VitaminC_euclidean_accuracy": 0.560546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.859346389770508, |
|
"eval_VitaminC_euclidean_ap": 0.5582755831276058, |
|
"eval_VitaminC_euclidean_f1": 0.667605633802817, |
|
"eval_VitaminC_euclidean_f1_threshold": 18.874879837036133, |
|
"eval_VitaminC_euclidean_precision": 0.5163398692810458, |
|
"eval_VitaminC_euclidean_recall": 0.9442231075697212, |
|
"eval_VitaminC_manhattan_accuracy": 0.560546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 239.6153564453125, |
|
"eval_VitaminC_manhattan_ap": 0.5569115785564898, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 501.158447265625, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.560546875, |
|
"eval_VitaminC_max_accuracy_threshold": 314.2790832519531, |
|
"eval_VitaminC_max_ap": 0.5582755831276058, |
|
"eval_VitaminC_max_f1": 0.6684636118598383, |
|
"eval_VitaminC_max_f1_threshold": 501.158447265625, |
|
"eval_VitaminC_max_precision": 0.5163398692810458, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5582755831276058, |
|
"eval_sts-test_pearson_cosine": 0.8825432226222443, |
|
"eval_sts-test_pearson_dot": 0.8720125241659442, |
|
"eval_sts-test_pearson_euclidean": 0.9053801707227738, |
|
"eval_sts-test_pearson_manhattan": 0.9060044572091359, |
|
"eval_sts-test_pearson_max": 0.9060044572091359, |
|
"eval_sts-test_spearman_cosine": 0.9055030196626042, |
|
"eval_sts-test_spearman_dot": 0.8729395405548455, |
|
"eval_sts-test_spearman_euclidean": 0.9013990604854444, |
|
"eval_sts-test_spearman_manhattan": 0.9021052353902007, |
|
"eval_sts-test_spearman_max": 0.9055030196626042, |
|
"eval_vitaminc-pairs_loss": 1.5215541124343872, |
|
"eval_vitaminc-pairs_runtime": 1.8745, |
|
"eval_vitaminc-pairs_samples_per_second": 57.614, |
|
"eval_vitaminc-pairs_steps_per_second": 1.067, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_negation-triplets_loss": 0.9813100099563599, |
|
"eval_negation-triplets_runtime": 0.3009, |
|
"eval_negation-triplets_samples_per_second": 212.73, |
|
"eval_negation-triplets_steps_per_second": 3.324, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_scitail-pairs-pos_loss": 0.09161412715911865, |
|
"eval_scitail-pairs-pos_runtime": 0.3936, |
|
"eval_scitail-pairs-pos_samples_per_second": 137.188, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.541, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_scitail-pairs-qa_loss": 0.0013133077882230282, |
|
"eval_scitail-pairs-qa_runtime": 0.5286, |
|
"eval_scitail-pairs-qa_samples_per_second": 242.147, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.784, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_xsum-pairs_loss": 0.049595557153224945, |
|
"eval_xsum-pairs_runtime": 2.7447, |
|
"eval_xsum-pairs_samples_per_second": 46.636, |
|
"eval_xsum-pairs_steps_per_second": 0.729, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_sciq_pairs_loss": 0.017273178324103355, |
|
"eval_sciq_pairs_runtime": 2.8401, |
|
"eval_sciq_pairs_samples_per_second": 45.069, |
|
"eval_sciq_pairs_steps_per_second": 0.704, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_qasc_pairs_loss": 0.09485691040754318, |
|
"eval_qasc_pairs_runtime": 0.6594, |
|
"eval_qasc_pairs_samples_per_second": 194.113, |
|
"eval_qasc_pairs_steps_per_second": 3.033, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_openbookqa_pairs_loss": 0.7253161072731018, |
|
"eval_openbookqa_pairs_runtime": 0.5801, |
|
"eval_openbookqa_pairs_samples_per_second": 220.633, |
|
"eval_openbookqa_pairs_steps_per_second": 3.447, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_msmarco_pairs_loss": 0.17383378744125366, |
|
"eval_msmarco_pairs_runtime": 1.4824, |
|
"eval_msmarco_pairs_samples_per_second": 86.346, |
|
"eval_msmarco_pairs_steps_per_second": 1.349, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_nq_pairs_loss": 0.10324681550264359, |
|
"eval_nq_pairs_runtime": 2.3542, |
|
"eval_nq_pairs_samples_per_second": 54.372, |
|
"eval_nq_pairs_steps_per_second": 0.85, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_trivia_pairs_loss": 0.5358972549438477, |
|
"eval_trivia_pairs_runtime": 3.5881, |
|
"eval_trivia_pairs_samples_per_second": 35.673, |
|
"eval_trivia_pairs_steps_per_second": 0.557, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_gooaq_pairs_loss": 0.3070329427719116, |
|
"eval_gooaq_pairs_runtime": 0.9009, |
|
"eval_gooaq_pairs_samples_per_second": 142.079, |
|
"eval_gooaq_pairs_steps_per_second": 2.22, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4044943820224719, |
|
"eval_paws-pos_loss": 0.024055125191807747, |
|
"eval_paws-pos_runtime": 0.6792, |
|
"eval_paws-pos_samples_per_second": 188.469, |
|
"eval_paws-pos_steps_per_second": 2.945, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4067415730337079, |
|
"grad_norm": 5.063413143157959, |
|
"learning_rate": 3.4998773481887046e-05, |
|
"loss": 0.7809, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.40898876404494383, |
|
"grad_norm": 4.108719825744629, |
|
"learning_rate": 3.499781954811798e-05, |
|
"loss": 0.8333, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.41123595505617977, |
|
"grad_norm": 4.6362104415893555, |
|
"learning_rate": 3.499659309368139e-05, |
|
"loss": 0.9283, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4134831460674157, |
|
"grad_norm": 4.432968616485596, |
|
"learning_rate": 3.499509414245486e-05, |
|
"loss": 0.7011, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4157303370786517, |
|
"grad_norm": 4.040768623352051, |
|
"learning_rate": 3.4993322723621164e-05, |
|
"loss": 0.8413, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.41797752808988764, |
|
"grad_norm": 5.797406196594238, |
|
"learning_rate": 3.499127887166769e-05, |
|
"loss": 1.1679, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4202247191011236, |
|
"grad_norm": 4.275143623352051, |
|
"learning_rate": 3.498896262638578e-05, |
|
"loss": 0.8701, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42247191011235957, |
|
"grad_norm": 3.920672655105591, |
|
"learning_rate": 3.498637403286993e-05, |
|
"loss": 0.8139, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4247191011235955, |
|
"grad_norm": 4.049210071563721, |
|
"learning_rate": 3.498351314151693e-05, |
|
"loss": 0.664, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.42696629213483145, |
|
"grad_norm": 4.007586479187012, |
|
"learning_rate": 3.498038000802489e-05, |
|
"loss": 0.3835, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42921348314606744, |
|
"grad_norm": 3.7303507328033447, |
|
"learning_rate": 3.497697469339215e-05, |
|
"loss": 0.8516, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4314606741573034, |
|
"grad_norm": 2.96820330619812, |
|
"learning_rate": 3.497329726391606e-05, |
|
"loss": 0.5479, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.4337078651685393, |
|
"grad_norm": 5.242271423339844, |
|
"learning_rate": 3.496934779119175e-05, |
|
"loss": 0.8642, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.43595505617977526, |
|
"grad_norm": 2.740006685256958, |
|
"learning_rate": 3.496512635211069e-05, |
|
"loss": 0.3121, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.43820224719101125, |
|
"grad_norm": 4.162242889404297, |
|
"learning_rate": 3.496063302885921e-05, |
|
"loss": 0.6932, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4404494382022472, |
|
"grad_norm": 0.632938027381897, |
|
"learning_rate": 3.495586790891689e-05, |
|
"loss": 0.0647, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.44269662921348313, |
|
"grad_norm": 4.595058917999268, |
|
"learning_rate": 3.495083108505487e-05, |
|
"loss": 0.8173, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4449438202247191, |
|
"grad_norm": 3.102372646331787, |
|
"learning_rate": 3.494552265533404e-05, |
|
"loss": 0.3122, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.44719101123595506, |
|
"grad_norm": 4.9895830154418945, |
|
"learning_rate": 3.493994272310313e-05, |
|
"loss": 0.7852, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.449438202247191, |
|
"grad_norm": 4.032258987426758, |
|
"learning_rate": 3.493409139699669e-05, |
|
"loss": 0.811, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.451685393258427, |
|
"grad_norm": 4.17324161529541, |
|
"learning_rate": 3.4927968790932973e-05, |
|
"loss": 0.7564, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.45393258426966293, |
|
"grad_norm": 0.49707159399986267, |
|
"learning_rate": 3.492157502411174e-05, |
|
"loss": 0.0541, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.45617977528089887, |
|
"grad_norm": 3.847059965133667, |
|
"learning_rate": 3.491491022101194e-05, |
|
"loss": 0.9085, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4584269662921348, |
|
"grad_norm": 4.565647602081299, |
|
"learning_rate": 3.4907974511389224e-05, |
|
"loss": 0.8416, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4606741573033708, |
|
"grad_norm": 0.8872150778770447, |
|
"learning_rate": 3.4900768030273515e-05, |
|
"loss": 0.0569, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.46292134831460674, |
|
"grad_norm": 3.2797999382019043, |
|
"learning_rate": 3.4893290917966305e-05, |
|
"loss": 0.7998, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4651685393258427, |
|
"grad_norm": 5.683195114135742, |
|
"learning_rate": 3.4885543320037956e-05, |
|
"loss": 0.7218, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.46741573033707867, |
|
"grad_norm": 5.348382949829102, |
|
"learning_rate": 3.4877525387324844e-05, |
|
"loss": 0.9292, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4696629213483146, |
|
"grad_norm": 4.3047099113464355, |
|
"learning_rate": 3.486923727592647e-05, |
|
"loss": 0.8279, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.47191011235955055, |
|
"grad_norm": 4.425166130065918, |
|
"learning_rate": 3.486067914720236e-05, |
|
"loss": 0.8452, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47415730337078654, |
|
"grad_norm": 5.7947916984558105, |
|
"learning_rate": 3.485185116776896e-05, |
|
"loss": 1.1099, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4764044943820225, |
|
"grad_norm": 4.257087230682373, |
|
"learning_rate": 3.4842753509496385e-05, |
|
"loss": 0.9436, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.4786516853932584, |
|
"grad_norm": 4.357375144958496, |
|
"learning_rate": 3.483338634950507e-05, |
|
"loss": 0.8389, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.48089887640449436, |
|
"grad_norm": 3.666268825531006, |
|
"learning_rate": 3.482374987016233e-05, |
|
"loss": 0.3297, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48314606741573035, |
|
"grad_norm": 3.0593607425689697, |
|
"learning_rate": 3.481384425907879e-05, |
|
"loss": 0.8098, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4853932584269663, |
|
"grad_norm": 0.4539957344532013, |
|
"learning_rate": 3.480366970910476e-05, |
|
"loss": 0.0386, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.48764044943820223, |
|
"grad_norm": 3.3102784156799316, |
|
"learning_rate": 3.479322641832646e-05, |
|
"loss": 0.7752, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4898876404494382, |
|
"grad_norm": 3.8798298835754395, |
|
"learning_rate": 3.4782514590062165e-05, |
|
"loss": 0.8071, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.49213483146067416, |
|
"grad_norm": 6.300197124481201, |
|
"learning_rate": 3.4771534432858255e-05, |
|
"loss": 2.571, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4943820224719101, |
|
"grad_norm": 4.163381099700928, |
|
"learning_rate": 3.4760286160485145e-05, |
|
"loss": 0.5912, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4966292134831461, |
|
"grad_norm": 3.5834686756134033, |
|
"learning_rate": 3.474876999193314e-05, |
|
"loss": 0.3792, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.49887640449438203, |
|
"grad_norm": 4.494593143463135, |
|
"learning_rate": 3.473698615140816e-05, |
|
"loss": 0.7456, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.501123595505618, |
|
"grad_norm": 3.909142017364502, |
|
"learning_rate": 3.4724934868327366e-05, |
|
"loss": 0.7207, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.503370786516854, |
|
"grad_norm": 3.0387282371520996, |
|
"learning_rate": 3.47126163773147e-05, |
|
"loss": 0.3254, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"grad_norm": 0.6529088616371155, |
|
"learning_rate": 3.4700030918196344e-05, |
|
"loss": 0.0461, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8303268551826477, |
|
"eval_VitaminC_cosine_ap": 0.5509523400010791, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2634955048561096, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 281.011474609375, |
|
"eval_VitaminC_dot_ap": 0.5281394234221073, |
|
"eval_VitaminC_dot_f1": 0.6711772665764546, |
|
"eval_VitaminC_dot_f1_threshold": 141.11529541015625, |
|
"eval_VitaminC_dot_precision": 0.5081967213114754, |
|
"eval_VitaminC_dot_recall": 0.9880478087649402, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 11.405111312866211, |
|
"eval_VitaminC_euclidean_ap": 0.5573376843815556, |
|
"eval_VitaminC_euclidean_f1": 0.6640211640211641, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.63976287841797, |
|
"eval_VitaminC_euclidean_precision": 0.497029702970297, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.556640625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 349.33441162109375, |
|
"eval_VitaminC_manhattan_ap": 0.5561637270496671, |
|
"eval_VitaminC_manhattan_f1": 0.6657824933687002, |
|
"eval_VitaminC_manhattan_f1_threshold": 505.0340270996094, |
|
"eval_VitaminC_manhattan_precision": 0.4990059642147117, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.556640625, |
|
"eval_VitaminC_max_accuracy_threshold": 349.33441162109375, |
|
"eval_VitaminC_max_ap": 0.5573376843815556, |
|
"eval_VitaminC_max_f1": 0.6711772665764546, |
|
"eval_VitaminC_max_f1_threshold": 505.0340270996094, |
|
"eval_VitaminC_max_precision": 0.5081967213114754, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5573376843815556, |
|
"eval_sts-test_pearson_cosine": 0.8848200869109313, |
|
"eval_sts-test_pearson_dot": 0.8723563516714744, |
|
"eval_sts-test_pearson_euclidean": 0.9070688973489409, |
|
"eval_sts-test_pearson_manhattan": 0.9073961699007848, |
|
"eval_sts-test_pearson_max": 0.9073961699007848, |
|
"eval_sts-test_spearman_cosine": 0.9050875937031079, |
|
"eval_sts-test_spearman_dot": 0.8699468894518183, |
|
"eval_sts-test_spearman_euclidean": 0.9020747597811932, |
|
"eval_sts-test_spearman_manhattan": 0.9019608230696907, |
|
"eval_sts-test_spearman_max": 0.9050875937031079, |
|
"eval_vitaminc-pairs_loss": 1.4897230863571167, |
|
"eval_vitaminc-pairs_runtime": 1.8927, |
|
"eval_vitaminc-pairs_samples_per_second": 57.062, |
|
"eval_vitaminc-pairs_steps_per_second": 1.057, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_negation-triplets_loss": 0.9457363486289978, |
|
"eval_negation-triplets_runtime": 0.3019, |
|
"eval_negation-triplets_samples_per_second": 212.002, |
|
"eval_negation-triplets_steps_per_second": 3.313, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_scitail-pairs-pos_loss": 0.07606112211942673, |
|
"eval_scitail-pairs-pos_runtime": 0.3972, |
|
"eval_scitail-pairs-pos_samples_per_second": 135.938, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.517, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_scitail-pairs-qa_loss": 0.001212431932799518, |
|
"eval_scitail-pairs-qa_runtime": 0.5348, |
|
"eval_scitail-pairs-qa_samples_per_second": 239.347, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.74, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_xsum-pairs_loss": 0.02758924476802349, |
|
"eval_xsum-pairs_runtime": 2.767, |
|
"eval_xsum-pairs_samples_per_second": 46.26, |
|
"eval_xsum-pairs_steps_per_second": 0.723, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_sciq_pairs_loss": 0.016450434923171997, |
|
"eval_sciq_pairs_runtime": 2.8812, |
|
"eval_sciq_pairs_samples_per_second": 44.426, |
|
"eval_sciq_pairs_steps_per_second": 0.694, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_qasc_pairs_loss": 0.09214109182357788, |
|
"eval_qasc_pairs_runtime": 0.6597, |
|
"eval_qasc_pairs_samples_per_second": 194.029, |
|
"eval_qasc_pairs_steps_per_second": 3.032, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_openbookqa_pairs_loss": 0.7429620623588562, |
|
"eval_openbookqa_pairs_runtime": 0.5947, |
|
"eval_openbookqa_pairs_samples_per_second": 215.22, |
|
"eval_openbookqa_pairs_steps_per_second": 3.363, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_msmarco_pairs_loss": 0.17871831357479095, |
|
"eval_msmarco_pairs_runtime": 1.5003, |
|
"eval_msmarco_pairs_samples_per_second": 85.314, |
|
"eval_msmarco_pairs_steps_per_second": 1.333, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_nq_pairs_loss": 0.09803248196840286, |
|
"eval_nq_pairs_runtime": 2.3587, |
|
"eval_nq_pairs_samples_per_second": 54.267, |
|
"eval_nq_pairs_steps_per_second": 0.848, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_trivia_pairs_loss": 0.5323590636253357, |
|
"eval_trivia_pairs_runtime": 3.6206, |
|
"eval_trivia_pairs_samples_per_second": 35.354, |
|
"eval_trivia_pairs_steps_per_second": 0.552, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_gooaq_pairs_loss": 0.2667708098888397, |
|
"eval_gooaq_pairs_runtime": 0.9171, |
|
"eval_gooaq_pairs_samples_per_second": 139.573, |
|
"eval_gooaq_pairs_steps_per_second": 2.181, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5056179775280899, |
|
"eval_paws-pos_loss": 0.0236118845641613, |
|
"eval_paws-pos_runtime": 0.6973, |
|
"eval_paws-pos_samples_per_second": 183.563, |
|
"eval_paws-pos_steps_per_second": 2.868, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5078651685393258, |
|
"grad_norm": 3.5867371559143066, |
|
"learning_rate": 3.4687178735995997e-05, |
|
"loss": 0.347, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5101123595505618, |
|
"grad_norm": 0.37994861602783203, |
|
"learning_rate": 3.467406008093016e-05, |
|
"loss": 0.0417, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5123595505617977, |
|
"grad_norm": 4.081336975097656, |
|
"learning_rate": 3.466067520840322e-05, |
|
"loss": 0.7783, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5146067415730337, |
|
"grad_norm": 4.306976795196533, |
|
"learning_rate": 3.46470243790025e-05, |
|
"loss": 0.9027, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5168539325842697, |
|
"grad_norm": 4.0280022621154785, |
|
"learning_rate": 3.4633107858493206e-05, |
|
"loss": 0.7166, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5191011235955056, |
|
"grad_norm": 3.4807679653167725, |
|
"learning_rate": 3.461892591781319e-05, |
|
"loss": 0.705, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5213483146067416, |
|
"grad_norm": 4.166563510894775, |
|
"learning_rate": 3.4604478833067756e-05, |
|
"loss": 0.8425, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5235955056179775, |
|
"grad_norm": 3.828537940979004, |
|
"learning_rate": 3.4589766885524204e-05, |
|
"loss": 0.5362, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5258426966292135, |
|
"grad_norm": 4.316190242767334, |
|
"learning_rate": 3.4574790361606435e-05, |
|
"loss": 0.7869, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5280898876404494, |
|
"grad_norm": 4.244805335998535, |
|
"learning_rate": 3.4559549552889285e-05, |
|
"loss": 0.88, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5303370786516854, |
|
"grad_norm": 4.208700656890869, |
|
"learning_rate": 3.454404475609294e-05, |
|
"loss": 0.8077, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5325842696629214, |
|
"grad_norm": 3.1473183631896973, |
|
"learning_rate": 3.4528276273077094e-05, |
|
"loss": 0.8145, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5348314606741573, |
|
"grad_norm": 3.798297166824341, |
|
"learning_rate": 3.4512244410835094e-05, |
|
"loss": 0.78, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5370786516853933, |
|
"grad_norm": 0.535529375076294, |
|
"learning_rate": 3.449594948148796e-05, |
|
"loss": 0.0536, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5393258426966292, |
|
"grad_norm": 3.2119970321655273, |
|
"learning_rate": 3.447939180227833e-05, |
|
"loss": 0.7975, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5415730337078651, |
|
"grad_norm": 4.725860118865967, |
|
"learning_rate": 3.446257169556425e-05, |
|
"loss": 0.8932, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5438202247191011, |
|
"grad_norm": 3.867676258087158, |
|
"learning_rate": 3.4445489488812906e-05, |
|
"loss": 0.3386, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5460674157303371, |
|
"grad_norm": 3.981114387512207, |
|
"learning_rate": 3.4428145514594274e-05, |
|
"loss": 0.7741, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5483146067415731, |
|
"grad_norm": 4.034990310668945, |
|
"learning_rate": 3.4410540110574616e-05, |
|
"loss": 0.7439, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.550561797752809, |
|
"grad_norm": 4.209812641143799, |
|
"learning_rate": 3.4392673619509916e-05, |
|
"loss": 0.7999, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5528089887640449, |
|
"grad_norm": 3.942631244659424, |
|
"learning_rate": 3.437454638923921e-05, |
|
"loss": 0.8542, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5550561797752809, |
|
"grad_norm": 4.087955951690674, |
|
"learning_rate": 3.435615877267783e-05, |
|
"loss": 0.6992, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5573033707865168, |
|
"grad_norm": 3.885822057723999, |
|
"learning_rate": 3.4337511127810466e-05, |
|
"loss": 0.8579, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5595505617977528, |
|
"grad_norm": 5.198770523071289, |
|
"learning_rate": 3.431860381768431e-05, |
|
"loss": 1.0221, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 4.321418285369873, |
|
"learning_rate": 3.4299437210401866e-05, |
|
"loss": 0.699, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5640449438202247, |
|
"grad_norm": 3.1992154121398926, |
|
"learning_rate": 3.4280011679113884e-05, |
|
"loss": 0.8523, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5662921348314607, |
|
"grad_norm": 4.94226598739624, |
|
"learning_rate": 3.4260327602012027e-05, |
|
"loss": 1.0307, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5685393258426966, |
|
"grad_norm": 3.958935499191284, |
|
"learning_rate": 3.424038536232154e-05, |
|
"loss": 0.846, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5707865168539326, |
|
"grad_norm": 4.023487091064453, |
|
"learning_rate": 3.4220185348293775e-05, |
|
"loss": 0.8361, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5730337078651685, |
|
"grad_norm": 3.275102138519287, |
|
"learning_rate": 3.4199727953198665e-05, |
|
"loss": 0.8224, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5752808988764045, |
|
"grad_norm": 3.6130261421203613, |
|
"learning_rate": 3.417901357531701e-05, |
|
"loss": 0.5301, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5775280898876405, |
|
"grad_norm": 4.571770668029785, |
|
"learning_rate": 3.415804261793277e-05, |
|
"loss": 0.3795, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5797752808988764, |
|
"grad_norm": 3.1884663105010986, |
|
"learning_rate": 3.413681548932521e-05, |
|
"loss": 0.5434, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5820224719101124, |
|
"grad_norm": 4.795211315155029, |
|
"learning_rate": 3.411533260276091e-05, |
|
"loss": 0.847, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5842696629213483, |
|
"grad_norm": 4.761318206787109, |
|
"learning_rate": 3.409359437648579e-05, |
|
"loss": 0.7323, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5865168539325842, |
|
"grad_norm": 4.4683098793029785, |
|
"learning_rate": 3.407160123371687e-05, |
|
"loss": 0.6606, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5887640449438202, |
|
"grad_norm": 0.7677178382873535, |
|
"learning_rate": 3.404935360263415e-05, |
|
"loss": 0.0543, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5910112359550562, |
|
"grad_norm": 4.110381126403809, |
|
"learning_rate": 3.4026851916372166e-05, |
|
"loss": 0.6709, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5932584269662922, |
|
"grad_norm": 4.766375541687012, |
|
"learning_rate": 3.400409661301162e-05, |
|
"loss": 0.809, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5955056179775281, |
|
"grad_norm": 5.389264106750488, |
|
"learning_rate": 3.398108813557082e-05, |
|
"loss": 1.0391, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.597752808988764, |
|
"grad_norm": 3.8780810832977295, |
|
"learning_rate": 3.3957826931997094e-05, |
|
"loss": 0.7396, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.399974822998047, |
|
"learning_rate": 3.393431345515801e-05, |
|
"loss": 0.7839, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6022471910112359, |
|
"grad_norm": 3.2098612785339355, |
|
"learning_rate": 3.391054816283262e-05, |
|
"loss": 0.3054, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.604494382022472, |
|
"grad_norm": 3.606182098388672, |
|
"learning_rate": 3.3886531517702505e-05, |
|
"loss": 0.5258, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"grad_norm": 4.3564934730529785, |
|
"learning_rate": 3.3862263987342784e-05, |
|
"loss": 0.7367, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_VitaminC_cosine_accuracy": 0.552734375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.814909815788269, |
|
"eval_VitaminC_cosine_ap": 0.5506214433093293, |
|
"eval_VitaminC_cosine_f1": 0.664886515353805, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3506072461605072, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 0.9920318725099602, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 316.90899658203125, |
|
"eval_VitaminC_dot_ap": 0.5353657977329522, |
|
"eval_VitaminC_dot_f1": 0.6666666666666667, |
|
"eval_VitaminC_dot_f1_threshold": 155.67796325683594, |
|
"eval_VitaminC_dot_precision": 0.506198347107438, |
|
"eval_VitaminC_dot_recall": 0.9760956175298805, |
|
"eval_VitaminC_euclidean_accuracy": 0.55078125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 10.77621841430664, |
|
"eval_VitaminC_euclidean_ap": 0.550546292530568, |
|
"eval_VitaminC_euclidean_f1": 0.6666666666666666, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.22284698486328, |
|
"eval_VitaminC_euclidean_precision": 0.5, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 335.6986389160156, |
|
"eval_VitaminC_manhattan_ap": 0.5497325043939846, |
|
"eval_VitaminC_manhattan_f1": 0.6640211640211641, |
|
"eval_VitaminC_manhattan_f1_threshold": 513.494873046875, |
|
"eval_VitaminC_manhattan_precision": 0.497029702970297, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.5546875, |
|
"eval_VitaminC_max_accuracy_threshold": 335.6986389160156, |
|
"eval_VitaminC_max_ap": 0.5506214433093293, |
|
"eval_VitaminC_max_f1": 0.6666666666666667, |
|
"eval_VitaminC_max_f1_threshold": 513.494873046875, |
|
"eval_VitaminC_max_precision": 0.506198347107438, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5506214433093293, |
|
"eval_sts-test_pearson_cosine": 0.8848372816940555, |
|
"eval_sts-test_pearson_dot": 0.8774995772730847, |
|
"eval_sts-test_pearson_euclidean": 0.9058906663416005, |
|
"eval_sts-test_pearson_manhattan": 0.9066316554236529, |
|
"eval_sts-test_pearson_max": 0.9066316554236529, |
|
"eval_sts-test_spearman_cosine": 0.9085018016884417, |
|
"eval_sts-test_spearman_dot": 0.8776881864036095, |
|
"eval_sts-test_spearman_euclidean": 0.903223569412372, |
|
"eval_sts-test_spearman_manhattan": 0.9037578547221237, |
|
"eval_sts-test_spearman_max": 0.9085018016884417, |
|
"eval_vitaminc-pairs_loss": 1.4935871362686157, |
|
"eval_vitaminc-pairs_runtime": 1.8963, |
|
"eval_vitaminc-pairs_samples_per_second": 56.952, |
|
"eval_vitaminc-pairs_steps_per_second": 1.055, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_negation-triplets_loss": 0.9505463242530823, |
|
"eval_negation-triplets_runtime": 0.3041, |
|
"eval_negation-triplets_samples_per_second": 210.485, |
|
"eval_negation-triplets_steps_per_second": 3.289, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_scitail-pairs-pos_loss": 0.09635873883962631, |
|
"eval_scitail-pairs-pos_runtime": 0.4048, |
|
"eval_scitail-pairs-pos_samples_per_second": 133.396, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.47, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_scitail-pairs-qa_loss": 0.0009468490607105196, |
|
"eval_scitail-pairs-qa_runtime": 0.5341, |
|
"eval_scitail-pairs-qa_samples_per_second": 239.65, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.745, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_xsum-pairs_loss": 0.026903513818979263, |
|
"eval_xsum-pairs_runtime": 2.7518, |
|
"eval_xsum-pairs_samples_per_second": 46.514, |
|
"eval_xsum-pairs_steps_per_second": 0.727, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_sciq_pairs_loss": 0.01619444414973259, |
|
"eval_sciq_pairs_runtime": 2.8856, |
|
"eval_sciq_pairs_samples_per_second": 44.358, |
|
"eval_sciq_pairs_steps_per_second": 0.693, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_qasc_pairs_loss": 0.09130185097455978, |
|
"eval_qasc_pairs_runtime": 0.6645, |
|
"eval_qasc_pairs_samples_per_second": 192.631, |
|
"eval_qasc_pairs_steps_per_second": 3.01, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_openbookqa_pairs_loss": 0.7336423397064209, |
|
"eval_openbookqa_pairs_runtime": 0.5935, |
|
"eval_openbookqa_pairs_samples_per_second": 215.687, |
|
"eval_openbookqa_pairs_steps_per_second": 3.37, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_msmarco_pairs_loss": 0.15868164598941803, |
|
"eval_msmarco_pairs_runtime": 1.5086, |
|
"eval_msmarco_pairs_samples_per_second": 84.844, |
|
"eval_msmarco_pairs_steps_per_second": 1.326, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_nq_pairs_loss": 0.10780799388885498, |
|
"eval_nq_pairs_runtime": 2.3746, |
|
"eval_nq_pairs_samples_per_second": 53.905, |
|
"eval_nq_pairs_steps_per_second": 0.842, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_trivia_pairs_loss": 0.49691149592399597, |
|
"eval_trivia_pairs_runtime": 3.5992, |
|
"eval_trivia_pairs_samples_per_second": 35.563, |
|
"eval_trivia_pairs_steps_per_second": 0.556, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_gooaq_pairs_loss": 0.3025541603565216, |
|
"eval_gooaq_pairs_runtime": 0.9181, |
|
"eval_gooaq_pairs_samples_per_second": 139.423, |
|
"eval_gooaq_pairs_steps_per_second": 2.178, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6067415730337079, |
|
"eval_paws-pos_loss": 0.024440350010991096, |
|
"eval_paws-pos_runtime": 0.7046, |
|
"eval_paws-pos_samples_per_second": 181.67, |
|
"eval_paws-pos_steps_per_second": 2.839, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6089887640449438, |
|
"grad_norm": 3.21183705329895, |
|
"learning_rate": 3.383774604421301e-05, |
|
"loss": 0.747, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6112359550561798, |
|
"grad_norm": 4.403411865234375, |
|
"learning_rate": 3.3812978165647975e-05, |
|
"loss": 0.7855, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6134831460674157, |
|
"grad_norm": 0.46612274646759033, |
|
"learning_rate": 3.3787960833848405e-05, |
|
"loss": 0.0473, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6157303370786517, |
|
"grad_norm": 3.30610990524292, |
|
"learning_rate": 3.3762694535871584e-05, |
|
"loss": 0.4378, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.6179775280898876, |
|
"grad_norm": 3.7408640384674072, |
|
"learning_rate": 3.373717976362187e-05, |
|
"loss": 0.8767, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6202247191011236, |
|
"grad_norm": 5.345012187957764, |
|
"learning_rate": 3.3711417013841105e-05, |
|
"loss": 1.0345, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6224719101123596, |
|
"grad_norm": 3.518765449523926, |
|
"learning_rate": 3.368540678809897e-05, |
|
"loss": 0.5182, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6247191011235955, |
|
"grad_norm": 6.666887283325195, |
|
"learning_rate": 3.3659149592783186e-05, |
|
"loss": 2.5949, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6269662921348315, |
|
"grad_norm": 3.197411298751831, |
|
"learning_rate": 3.363264593908969e-05, |
|
"loss": 0.833, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6292134831460674, |
|
"grad_norm": 0.6012090444564819, |
|
"learning_rate": 3.360589634301267e-05, |
|
"loss": 0.0778, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6314606741573033, |
|
"grad_norm": 4.5016188621521, |
|
"learning_rate": 3.357890132533449e-05, |
|
"loss": 0.8048, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6337078651685393, |
|
"grad_norm": 3.865889072418213, |
|
"learning_rate": 3.35516614116156e-05, |
|
"loss": 0.7524, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6359550561797753, |
|
"grad_norm": 3.2998361587524414, |
|
"learning_rate": 3.3524177132184266e-05, |
|
"loss": 0.3246, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6382022471910113, |
|
"grad_norm": 0.6418587565422058, |
|
"learning_rate": 3.349644902212628e-05, |
|
"loss": 0.0728, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6404494382022472, |
|
"grad_norm": 5.772351264953613, |
|
"learning_rate": 3.34684776212745e-05, |
|
"loss": 2.3619, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6426966292134831, |
|
"grad_norm": 3.769488573074341, |
|
"learning_rate": 3.3440263474198376e-05, |
|
"loss": 0.7464, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6449438202247191, |
|
"grad_norm": 4.559601783752441, |
|
"learning_rate": 3.3411807130193325e-05, |
|
"loss": 0.6691, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.647191011235955, |
|
"grad_norm": 0.45337462425231934, |
|
"learning_rate": 3.338310914327005e-05, |
|
"loss": 0.059, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6494382022471911, |
|
"grad_norm": 4.7184553146362305, |
|
"learning_rate": 3.3354170072143766e-05, |
|
"loss": 0.7841, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.651685393258427, |
|
"grad_norm": 3.886216640472412, |
|
"learning_rate": 3.332499048022328e-05, |
|
"loss": 0.647, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6539325842696629, |
|
"grad_norm": 4.497567176818848, |
|
"learning_rate": 3.329557093560006e-05, |
|
"loss": 0.8814, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6561797752808989, |
|
"grad_norm": 3.995391368865967, |
|
"learning_rate": 3.326591201103716e-05, |
|
"loss": 0.7247, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6584269662921348, |
|
"grad_norm": 0.4348815083503723, |
|
"learning_rate": 3.323601428395809e-05, |
|
"loss": 0.059, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6606741573033708, |
|
"grad_norm": 3.6197896003723145, |
|
"learning_rate": 3.320587833643554e-05, |
|
"loss": 0.8317, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6629213483146067, |
|
"grad_norm": 4.4088215827941895, |
|
"learning_rate": 3.317550475518006e-05, |
|
"loss": 0.8548, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6651685393258427, |
|
"grad_norm": 4.541014194488525, |
|
"learning_rate": 3.314489413152867e-05, |
|
"loss": 0.9213, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6674157303370787, |
|
"grad_norm": 3.067857265472412, |
|
"learning_rate": 3.311404706143329e-05, |
|
"loss": 0.6923, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6696629213483146, |
|
"grad_norm": 4.037753582000732, |
|
"learning_rate": 3.3082964145449174e-05, |
|
"loss": 0.7777, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6719101123595506, |
|
"grad_norm": 4.280182838439941, |
|
"learning_rate": 3.305164598872322e-05, |
|
"loss": 0.7496, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6741573033707865, |
|
"grad_norm": 4.357325077056885, |
|
"learning_rate": 3.302009320098218e-05, |
|
"loss": 0.7636, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6764044943820224, |
|
"grad_norm": 4.007940292358398, |
|
"learning_rate": 3.2988306396520775e-05, |
|
"loss": 0.6867, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6786516853932584, |
|
"grad_norm": 0.8544747233390808, |
|
"learning_rate": 3.295628619418977e-05, |
|
"loss": 0.0506, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6808988764044944, |
|
"grad_norm": 3.34498929977417, |
|
"learning_rate": 3.292403321738387e-05, |
|
"loss": 0.3346, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6831460674157304, |
|
"grad_norm": 2.441420316696167, |
|
"learning_rate": 3.289154809402967e-05, |
|
"loss": 0.2485, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6853932584269663, |
|
"grad_norm": 4.533839702606201, |
|
"learning_rate": 3.285883145657334e-05, |
|
"loss": 0.8508, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6876404494382022, |
|
"grad_norm": 3.2033944129943848, |
|
"learning_rate": 3.2825883941968346e-05, |
|
"loss": 0.8464, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6898876404494382, |
|
"grad_norm": 3.6305220127105713, |
|
"learning_rate": 3.279270619166309e-05, |
|
"loss": 0.3385, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6921348314606741, |
|
"grad_norm": 4.438405990600586, |
|
"learning_rate": 3.2759298851588336e-05, |
|
"loss": 0.8837, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6943820224719102, |
|
"grad_norm": 4.252586841583252, |
|
"learning_rate": 3.272566257214474e-05, |
|
"loss": 0.9019, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6966292134831461, |
|
"grad_norm": 4.231752872467041, |
|
"learning_rate": 3.2691798008190096e-05, |
|
"loss": 0.6922, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.698876404494382, |
|
"grad_norm": 3.862682342529297, |
|
"learning_rate": 3.265770581902662e-05, |
|
"loss": 0.6348, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.701123595505618, |
|
"grad_norm": 3.783026933670044, |
|
"learning_rate": 3.262338666838813e-05, |
|
"loss": 0.7522, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7033707865168539, |
|
"grad_norm": 4.141933917999268, |
|
"learning_rate": 3.25888412244271e-05, |
|
"loss": 0.7843, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7056179775280899, |
|
"grad_norm": 0.7638006210327148, |
|
"learning_rate": 3.2554070159701684e-05, |
|
"loss": 0.0493, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"grad_norm": 3.7285079956054688, |
|
"learning_rate": 3.2519074151162564e-05, |
|
"loss": 0.357, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_VitaminC_cosine_accuracy": 0.556640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8318675756454468, |
|
"eval_VitaminC_cosine_ap": 0.553255462027648, |
|
"eval_VitaminC_cosine_f1": 0.6666666666666666, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3080925941467285, |
|
"eval_VitaminC_cosine_precision": 0.5, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 284.4936218261719, |
|
"eval_VitaminC_dot_ap": 0.5335304755231123, |
|
"eval_VitaminC_dot_f1": 0.6675531914893617, |
|
"eval_VitaminC_dot_f1_threshold": 117.11366271972656, |
|
"eval_VitaminC_dot_precision": 0.500998003992016, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 14.916669845581055, |
|
"eval_VitaminC_euclidean_ap": 0.5560392780320775, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.758323669433594, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 303.769775390625, |
|
"eval_VitaminC_manhattan_ap": 0.5575735035337728, |
|
"eval_VitaminC_manhattan_f1": 0.6666666666666666, |
|
"eval_VitaminC_manhattan_f1_threshold": 500.6726989746094, |
|
"eval_VitaminC_manhattan_precision": 0.5, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.556640625, |
|
"eval_VitaminC_max_accuracy_threshold": 303.769775390625, |
|
"eval_VitaminC_max_ap": 0.5575735035337728, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 500.6726989746094, |
|
"eval_VitaminC_max_precision": 0.500998003992016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5575735035337728, |
|
"eval_sts-test_pearson_cosine": 0.884017793393225, |
|
"eval_sts-test_pearson_dot": 0.8725802033594147, |
|
"eval_sts-test_pearson_euclidean": 0.9065592531799239, |
|
"eval_sts-test_pearson_manhattan": 0.9070236641674441, |
|
"eval_sts-test_pearson_max": 0.9070236641674441, |
|
"eval_sts-test_spearman_cosine": 0.9067846957888538, |
|
"eval_sts-test_spearman_dot": 0.8716365180769119, |
|
"eval_sts-test_spearman_euclidean": 0.9026938039800204, |
|
"eval_sts-test_spearman_manhattan": 0.903306941012344, |
|
"eval_sts-test_spearman_max": 0.9067846957888538, |
|
"eval_vitaminc-pairs_loss": 1.4885247945785522, |
|
"eval_vitaminc-pairs_runtime": 1.9137, |
|
"eval_vitaminc-pairs_samples_per_second": 56.436, |
|
"eval_vitaminc-pairs_steps_per_second": 1.045, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_negation-triplets_loss": 0.9597576856613159, |
|
"eval_negation-triplets_runtime": 0.3023, |
|
"eval_negation-triplets_samples_per_second": 211.742, |
|
"eval_negation-triplets_steps_per_second": 3.308, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_scitail-pairs-pos_loss": 0.09951130300760269, |
|
"eval_scitail-pairs-pos_runtime": 0.3896, |
|
"eval_scitail-pairs-pos_samples_per_second": 138.608, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.567, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_scitail-pairs-qa_loss": 0.0010157548822462559, |
|
"eval_scitail-pairs-qa_runtime": 0.5373, |
|
"eval_scitail-pairs-qa_samples_per_second": 238.245, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.723, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_xsum-pairs_loss": 0.027823584154248238, |
|
"eval_xsum-pairs_runtime": 2.7408, |
|
"eval_xsum-pairs_samples_per_second": 46.701, |
|
"eval_xsum-pairs_steps_per_second": 0.73, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_sciq_pairs_loss": 0.015241424553096294, |
|
"eval_sciq_pairs_runtime": 2.8458, |
|
"eval_sciq_pairs_samples_per_second": 44.978, |
|
"eval_sciq_pairs_steps_per_second": 0.703, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_qasc_pairs_loss": 0.09173130989074707, |
|
"eval_qasc_pairs_runtime": 0.6608, |
|
"eval_qasc_pairs_samples_per_second": 193.694, |
|
"eval_qasc_pairs_steps_per_second": 3.026, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_openbookqa_pairs_loss": 0.6921954154968262, |
|
"eval_openbookqa_pairs_runtime": 0.5893, |
|
"eval_openbookqa_pairs_samples_per_second": 217.196, |
|
"eval_openbookqa_pairs_steps_per_second": 3.394, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_msmarco_pairs_loss": 0.15177518129348755, |
|
"eval_msmarco_pairs_runtime": 1.494, |
|
"eval_msmarco_pairs_samples_per_second": 85.673, |
|
"eval_msmarco_pairs_steps_per_second": 1.339, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_nq_pairs_loss": 0.10136909037828445, |
|
"eval_nq_pairs_runtime": 2.3524, |
|
"eval_nq_pairs_samples_per_second": 54.413, |
|
"eval_nq_pairs_steps_per_second": 0.85, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_trivia_pairs_loss": 0.5301617980003357, |
|
"eval_trivia_pairs_runtime": 3.5809, |
|
"eval_trivia_pairs_samples_per_second": 35.745, |
|
"eval_trivia_pairs_steps_per_second": 0.559, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_gooaq_pairs_loss": 0.28424739837646484, |
|
"eval_gooaq_pairs_runtime": 0.9167, |
|
"eval_gooaq_pairs_samples_per_second": 139.635, |
|
"eval_gooaq_pairs_steps_per_second": 2.182, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7078651685393258, |
|
"eval_paws-pos_loss": 0.023981213569641113, |
|
"eval_paws-pos_runtime": 0.6966, |
|
"eval_paws-pos_samples_per_second": 183.744, |
|
"eval_paws-pos_steps_per_second": 2.871, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7101123595505618, |
|
"grad_norm": 3.6374969482421875, |
|
"learning_rate": 3.248385388013984e-05, |
|
"loss": 0.841, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7123595505617978, |
|
"grad_norm": 4.251607418060303, |
|
"learning_rate": 3.2448410032329716e-05, |
|
"loss": 0.5849, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7146067415730337, |
|
"grad_norm": 4.323038101196289, |
|
"learning_rate": 3.241274329778117e-05, |
|
"loss": 0.6818, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.7168539325842697, |
|
"grad_norm": 4.027289867401123, |
|
"learning_rate": 3.237685437088251e-05, |
|
"loss": 0.8269, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7191011235955056, |
|
"grad_norm": 3.014479875564575, |
|
"learning_rate": 3.234074395034787e-05, |
|
"loss": 0.6979, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7213483146067415, |
|
"grad_norm": 3.5980277061462402, |
|
"learning_rate": 3.2304412739203595e-05, |
|
"loss": 0.3218, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7235955056179775, |
|
"grad_norm": 3.2924134731292725, |
|
"learning_rate": 3.226786144477456e-05, |
|
"loss": 0.8206, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7258426966292135, |
|
"grad_norm": 2.524231195449829, |
|
"learning_rate": 3.2231090778670385e-05, |
|
"loss": 0.2106, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7280898876404495, |
|
"grad_norm": 5.464061260223389, |
|
"learning_rate": 3.2194101456771604e-05, |
|
"loss": 1.0524, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7303370786516854, |
|
"grad_norm": 3.4692578315734863, |
|
"learning_rate": 3.215689419921572e-05, |
|
"loss": 0.3774, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7325842696629213, |
|
"grad_norm": 4.947183132171631, |
|
"learning_rate": 3.211946973038315e-05, |
|
"loss": 0.9098, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7348314606741573, |
|
"grad_norm": 4.432866096496582, |
|
"learning_rate": 3.208182877888319e-05, |
|
"loss": 0.7988, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7370786516853932, |
|
"grad_norm": 4.585951328277588, |
|
"learning_rate": 3.204397207753978e-05, |
|
"loss": 0.7916, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7393258426966293, |
|
"grad_norm": 3.7288637161254883, |
|
"learning_rate": 3.200590036337724e-05, |
|
"loss": 0.6314, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7415730337078652, |
|
"grad_norm": 3.840074300765991, |
|
"learning_rate": 3.196761437760593e-05, |
|
"loss": 0.8628, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7438202247191011, |
|
"grad_norm": 0.6423048377037048, |
|
"learning_rate": 3.192911486560784e-05, |
|
"loss": 0.0688, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7460674157303371, |
|
"grad_norm": 4.148509502410889, |
|
"learning_rate": 3.1890402576922036e-05, |
|
"loss": 0.7386, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.748314606741573, |
|
"grad_norm": 4.7345147132873535, |
|
"learning_rate": 3.1851478265230103e-05, |
|
"loss": 0.8458, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.750561797752809, |
|
"grad_norm": 0.695708155632019, |
|
"learning_rate": 3.181234268834144e-05, |
|
"loss": 0.0442, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7528089887640449, |
|
"grad_norm": 3.434741735458374, |
|
"learning_rate": 3.177299660817856e-05, |
|
"loss": 0.317, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.755056179775281, |
|
"grad_norm": 3.306964874267578, |
|
"learning_rate": 3.1733440790762176e-05, |
|
"loss": 0.8087, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7573033707865169, |
|
"grad_norm": 3.010828733444214, |
|
"learning_rate": 3.169367600619637e-05, |
|
"loss": 0.3398, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7595505617977528, |
|
"grad_norm": 4.152151584625244, |
|
"learning_rate": 3.1653703028653545e-05, |
|
"loss": 0.699, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7617977528089888, |
|
"grad_norm": 4.073326110839844, |
|
"learning_rate": 3.161352263635937e-05, |
|
"loss": 0.7901, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7640449438202247, |
|
"grad_norm": 4.365633487701416, |
|
"learning_rate": 3.157313561157764e-05, |
|
"loss": 0.8072, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7662921348314606, |
|
"grad_norm": 3.506556272506714, |
|
"learning_rate": 3.153254274059501e-05, |
|
"loss": 0.5939, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7685393258426966, |
|
"grad_norm": 4.319092273712158, |
|
"learning_rate": 3.149174481370575e-05, |
|
"loss": 0.6933, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7707865168539326, |
|
"grad_norm": 0.6184964179992676, |
|
"learning_rate": 3.145074262519629e-05, |
|
"loss": 0.0437, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7730337078651686, |
|
"grad_norm": 4.866581916809082, |
|
"learning_rate": 3.140953697332979e-05, |
|
"loss": 0.9882, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7752808988764045, |
|
"grad_norm": 3.9585559368133545, |
|
"learning_rate": 3.136812866033063e-05, |
|
"loss": 0.3707, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7775280898876404, |
|
"grad_norm": 4.253391265869141, |
|
"learning_rate": 3.132651849236871e-05, |
|
"loss": 0.7103, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7797752808988764, |
|
"grad_norm": 0.5847011208534241, |
|
"learning_rate": 3.128470727954383e-05, |
|
"loss": 0.0372, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7820224719101123, |
|
"grad_norm": 0.5127836465835571, |
|
"learning_rate": 3.124269583586989e-05, |
|
"loss": 0.028, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7842696629213484, |
|
"grad_norm": 4.145182132720947, |
|
"learning_rate": 3.120048497925904e-05, |
|
"loss": 0.7676, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7865168539325843, |
|
"grad_norm": 4.833105087280273, |
|
"learning_rate": 3.1158075531505755e-05, |
|
"loss": 0.6754, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7887640449438202, |
|
"grad_norm": 0.49345946311950684, |
|
"learning_rate": 3.1115468318270844e-05, |
|
"loss": 0.0439, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7910112359550562, |
|
"grad_norm": 3.357720375061035, |
|
"learning_rate": 3.107266416906538e-05, |
|
"loss": 0.8039, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7932584269662921, |
|
"grad_norm": 0.2371903359889984, |
|
"learning_rate": 3.1029663917234514e-05, |
|
"loss": 0.0104, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7955056179775281, |
|
"grad_norm": 0.48881796002388, |
|
"learning_rate": 3.098646839994132e-05, |
|
"loss": 0.0555, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.797752808988764, |
|
"grad_norm": 3.3021090030670166, |
|
"learning_rate": 3.094307845815042e-05, |
|
"loss": 0.8646, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.0412533283233643, |
|
"learning_rate": 3.0899494936611663e-05, |
|
"loss": 0.7781, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.802247191011236, |
|
"grad_norm": 0.30917835235595703, |
|
"learning_rate": 3.085571868384366e-05, |
|
"loss": 0.011, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8044943820224719, |
|
"grad_norm": 3.6957950592041016, |
|
"learning_rate": 3.081175055211726e-05, |
|
"loss": 0.3267, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8067415730337079, |
|
"grad_norm": 7.202300071716309, |
|
"learning_rate": 3.0767591397438974e-05, |
|
"loss": 2.5281, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"grad_norm": 2.9833834171295166, |
|
"learning_rate": 3.072324207953429e-05, |
|
"loss": 0.301, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.6793336868286133, |
|
"eval_VitaminC_cosine_ap": 0.5555632752592039, |
|
"eval_VitaminC_cosine_f1": 0.6657824933687002, |
|
"eval_VitaminC_cosine_f1_threshold": 0.28029173612594604, |
|
"eval_VitaminC_cosine_precision": 0.4990059642147117, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 265.5102844238281, |
|
"eval_VitaminC_dot_ap": 0.5326105108889087, |
|
"eval_VitaminC_dot_f1": 0.6675531914893617, |
|
"eval_VitaminC_dot_f1_threshold": 106.37774658203125, |
|
"eval_VitaminC_dot_precision": 0.500998003992016, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.55859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.296594619750977, |
|
"eval_VitaminC_euclidean_ap": 0.5592294311948881, |
|
"eval_VitaminC_euclidean_f1": 0.6657824933687002, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.58568572998047, |
|
"eval_VitaminC_euclidean_precision": 0.4990059642147117, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.556640625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 306.79913330078125, |
|
"eval_VitaminC_manhattan_ap": 0.5598941655081213, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 512.0101318359375, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 306.79913330078125, |
|
"eval_VitaminC_max_ap": 0.5598941655081213, |
|
"eval_VitaminC_max_f1": 0.6675531914893617, |
|
"eval_VitaminC_max_f1_threshold": 512.0101318359375, |
|
"eval_VitaminC_max_precision": 0.500998003992016, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5598941655081213, |
|
"eval_sts-test_pearson_cosine": 0.8832151520369376, |
|
"eval_sts-test_pearson_dot": 0.8763916954110884, |
|
"eval_sts-test_pearson_euclidean": 0.9046869354209082, |
|
"eval_sts-test_pearson_manhattan": 0.9047119917370259, |
|
"eval_sts-test_pearson_max": 0.9047119917370259, |
|
"eval_sts-test_spearman_cosine": 0.9054341922225841, |
|
"eval_sts-test_spearman_dot": 0.8786041104705073, |
|
"eval_sts-test_spearman_euclidean": 0.9002407635868509, |
|
"eval_sts-test_spearman_manhattan": 0.9006719867416183, |
|
"eval_sts-test_spearman_max": 0.9054341922225841, |
|
"eval_vitaminc-pairs_loss": 1.4290639162063599, |
|
"eval_vitaminc-pairs_runtime": 1.8905, |
|
"eval_vitaminc-pairs_samples_per_second": 57.128, |
|
"eval_vitaminc-pairs_steps_per_second": 1.058, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_negation-triplets_loss": 0.9030703902244568, |
|
"eval_negation-triplets_runtime": 0.2986, |
|
"eval_negation-triplets_samples_per_second": 214.299, |
|
"eval_negation-triplets_steps_per_second": 3.348, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_scitail-pairs-pos_loss": 0.10728535801172256, |
|
"eval_scitail-pairs-pos_runtime": 0.3831, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.965, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.61, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_scitail-pairs-qa_loss": 0.0005650219391100109, |
|
"eval_scitail-pairs-qa_runtime": 0.5259, |
|
"eval_scitail-pairs-qa_samples_per_second": 243.397, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.803, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_xsum-pairs_loss": 0.025990577414631844, |
|
"eval_xsum-pairs_runtime": 2.734, |
|
"eval_xsum-pairs_samples_per_second": 46.818, |
|
"eval_xsum-pairs_steps_per_second": 0.732, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_sciq_pairs_loss": 0.016017427667975426, |
|
"eval_sciq_pairs_runtime": 2.8252, |
|
"eval_sciq_pairs_samples_per_second": 45.307, |
|
"eval_sciq_pairs_steps_per_second": 0.708, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_qasc_pairs_loss": 0.10250324755907059, |
|
"eval_qasc_pairs_runtime": 0.6511, |
|
"eval_qasc_pairs_samples_per_second": 196.585, |
|
"eval_qasc_pairs_steps_per_second": 3.072, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_openbookqa_pairs_loss": 0.6710968613624573, |
|
"eval_openbookqa_pairs_runtime": 0.5776, |
|
"eval_openbookqa_pairs_samples_per_second": 221.625, |
|
"eval_openbookqa_pairs_steps_per_second": 3.463, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_msmarco_pairs_loss": 0.14522777497768402, |
|
"eval_msmarco_pairs_runtime": 1.4981, |
|
"eval_msmarco_pairs_samples_per_second": 85.441, |
|
"eval_msmarco_pairs_steps_per_second": 1.335, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_nq_pairs_loss": 0.10225611180067062, |
|
"eval_nq_pairs_runtime": 2.3595, |
|
"eval_nq_pairs_samples_per_second": 54.248, |
|
"eval_nq_pairs_steps_per_second": 0.848, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_trivia_pairs_loss": 0.5312957167625427, |
|
"eval_trivia_pairs_runtime": 3.5813, |
|
"eval_trivia_pairs_samples_per_second": 35.741, |
|
"eval_trivia_pairs_steps_per_second": 0.558, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_gooaq_pairs_loss": 0.27713337540626526, |
|
"eval_gooaq_pairs_runtime": 0.9166, |
|
"eval_gooaq_pairs_samples_per_second": 139.645, |
|
"eval_gooaq_pairs_steps_per_second": 2.182, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8089887640449438, |
|
"eval_paws-pos_loss": 0.024326296523213387, |
|
"eval_paws-pos_runtime": 0.6893, |
|
"eval_paws-pos_samples_per_second": 185.682, |
|
"eval_paws-pos_steps_per_second": 2.901, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8112359550561797, |
|
"grad_norm": 4.372533798217773, |
|
"learning_rate": 3.067870346183096e-05, |
|
"loss": 0.7533, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8134831460674158, |
|
"grad_norm": 2.6585452556610107, |
|
"learning_rate": 3.063397641144216e-05, |
|
"loss": 0.2958, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.8157303370786517, |
|
"grad_norm": 4.378647327423096, |
|
"learning_rate": 3.058906179914962e-05, |
|
"loss": 0.8296, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8179775280898877, |
|
"grad_norm": 3.1601309776306152, |
|
"learning_rate": 3.0543960499386694e-05, |
|
"loss": 0.3191, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8202247191011236, |
|
"grad_norm": 3.446498394012451, |
|
"learning_rate": 3.049867339022129e-05, |
|
"loss": 0.7866, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8224719101123595, |
|
"grad_norm": 3.0058486461639404, |
|
"learning_rate": 3.0453201353338826e-05, |
|
"loss": 0.3157, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8247191011235955, |
|
"grad_norm": 4.380611419677734, |
|
"learning_rate": 3.040754527402502e-05, |
|
"loss": 0.7402, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8269662921348314, |
|
"grad_norm": 3.8081209659576416, |
|
"learning_rate": 3.036170604114869e-05, |
|
"loss": 0.4957, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8292134831460675, |
|
"grad_norm": 4.2056989669799805, |
|
"learning_rate": 3.031568454714442e-05, |
|
"loss": 0.8505, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.8314606741573034, |
|
"grad_norm": 3.101804733276367, |
|
"learning_rate": 3.0269481687995207e-05, |
|
"loss": 0.7702, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8337078651685393, |
|
"grad_norm": 4.0704345703125, |
|
"learning_rate": 3.0223098363215002e-05, |
|
"loss": 0.7591, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8359550561797753, |
|
"grad_norm": 2.9631364345550537, |
|
"learning_rate": 3.0176535475831208e-05, |
|
"loss": 0.727, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8382022471910112, |
|
"grad_norm": 3.3760929107666016, |
|
"learning_rate": 3.01297939323671e-05, |
|
"loss": 0.3233, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8404494382022472, |
|
"grad_norm": 4.116260051727295, |
|
"learning_rate": 3.0082874642824164e-05, |
|
"loss": 0.8738, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8426966292134831, |
|
"grad_norm": 0.40298929810523987, |
|
"learning_rate": 3.0035778520664388e-05, |
|
"loss": 0.0393, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8449438202247191, |
|
"grad_norm": 3.0647614002227783, |
|
"learning_rate": 2.9988506482792485e-05, |
|
"loss": 0.7454, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8471910112359551, |
|
"grad_norm": 2.951953649520874, |
|
"learning_rate": 2.994105944953803e-05, |
|
"loss": 0.8297, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.849438202247191, |
|
"grad_norm": 4.049951553344727, |
|
"learning_rate": 2.9893438344637538e-05, |
|
"loss": 0.7802, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.851685393258427, |
|
"grad_norm": 3.7383949756622314, |
|
"learning_rate": 2.984564409521651e-05, |
|
"loss": 0.6229, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8539325842696629, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.979767763177134e-05, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8561797752808988, |
|
"grad_norm": 3.399641513824463, |
|
"learning_rate": 2.9749539888151244e-05, |
|
"loss": 0.3506, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8584269662921349, |
|
"grad_norm": 0.48723292350769043, |
|
"learning_rate": 2.9701231801540032e-05, |
|
"loss": 0.041, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8606741573033708, |
|
"grad_norm": 3.1171765327453613, |
|
"learning_rate": 2.9652754312437897e-05, |
|
"loss": 0.725, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8629213483146068, |
|
"grad_norm": 2.6491808891296387, |
|
"learning_rate": 2.9604108364643112e-05, |
|
"loss": 0.257, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8651685393258427, |
|
"grad_norm": 4.025605201721191, |
|
"learning_rate": 2.9555294905233606e-05, |
|
"loss": 0.7912, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8674157303370786, |
|
"grad_norm": 4.142299652099609, |
|
"learning_rate": 2.9506314884548583e-05, |
|
"loss": 0.8915, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8696629213483146, |
|
"grad_norm": 2.943582534790039, |
|
"learning_rate": 2.945716925616998e-05, |
|
"loss": 0.779, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8719101123595505, |
|
"grad_norm": 4.478114604949951, |
|
"learning_rate": 2.9407858976903913e-05, |
|
"loss": 0.7828, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8741573033707866, |
|
"grad_norm": 3.9878995418548584, |
|
"learning_rate": 2.935838500676207e-05, |
|
"loss": 0.7462, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8764044943820225, |
|
"grad_norm": 3.7733311653137207, |
|
"learning_rate": 2.9308748308942983e-05, |
|
"loss": 0.7913, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8786516853932584, |
|
"grad_norm": 3.179732322692871, |
|
"learning_rate": 2.9258949849813315e-05, |
|
"loss": 0.3209, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8808988764044944, |
|
"grad_norm": 3.6665351390838623, |
|
"learning_rate": 2.9208990598889008e-05, |
|
"loss": 0.5932, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8831460674157303, |
|
"grad_norm": 0.545093834400177, |
|
"learning_rate": 2.9158871528816442e-05, |
|
"loss": 0.0613, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8853932584269663, |
|
"grad_norm": 5.226474285125732, |
|
"learning_rate": 2.9108593615353467e-05, |
|
"loss": 0.8802, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8876404494382022, |
|
"grad_norm": 3.691817283630371, |
|
"learning_rate": 2.9058157837350437e-05, |
|
"loss": 0.6116, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8898876404494382, |
|
"grad_norm": 0.4754512906074524, |
|
"learning_rate": 2.900756517673113e-05, |
|
"loss": 0.0537, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8921348314606742, |
|
"grad_norm": 2.874117374420166, |
|
"learning_rate": 2.8956816618473647e-05, |
|
"loss": 0.3006, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8943820224719101, |
|
"grad_norm": 3.8957912921905518, |
|
"learning_rate": 2.890591315059121e-05, |
|
"loss": 0.7636, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8966292134831461, |
|
"grad_norm": 3.7385432720184326, |
|
"learning_rate": 2.8854855764112973e-05, |
|
"loss": 0.612, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.898876404494382, |
|
"grad_norm": 3.7403082847595215, |
|
"learning_rate": 2.880364545306468e-05, |
|
"loss": 0.54, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9011235955056179, |
|
"grad_norm": 2.7360849380493164, |
|
"learning_rate": 2.8752283214449328e-05, |
|
"loss": 0.2761, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.903370786516854, |
|
"grad_norm": 8.988025665283203, |
|
"learning_rate": 2.8700770048227775e-05, |
|
"loss": 1.2668, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9056179775280899, |
|
"grad_norm": 3.411295175552368, |
|
"learning_rate": 2.864910695729925e-05, |
|
"loss": 0.8066, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9078651685393259, |
|
"grad_norm": 0.3018481135368347, |
|
"learning_rate": 2.8597294947481834e-05, |
|
"loss": 0.0094, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"grad_norm": 4.116438388824463, |
|
"learning_rate": 2.8545335027492885e-05, |
|
"loss": 0.673, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_VitaminC_cosine_accuracy": 0.55859375, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7188639044761658, |
|
"eval_VitaminC_cosine_ap": 0.5516905675485202, |
|
"eval_VitaminC_cosine_f1": 0.6675712347354138, |
|
"eval_VitaminC_cosine_f1_threshold": 0.42514583468437195, |
|
"eval_VitaminC_cosine_precision": 0.5061728395061729, |
|
"eval_VitaminC_cosine_recall": 0.9800796812749004, |
|
"eval_VitaminC_dot_accuracy": 0.548828125, |
|
"eval_VitaminC_dot_accuracy_threshold": 320.3775329589844, |
|
"eval_VitaminC_dot_ap": 0.5343066680873013, |
|
"eval_VitaminC_dot_f1": 0.6720867208672087, |
|
"eval_VitaminC_dot_f1_threshold": 152.709716796875, |
|
"eval_VitaminC_dot_precision": 0.5092402464065708, |
|
"eval_VitaminC_dot_recall": 0.9880478087649402, |
|
"eval_VitaminC_euclidean_accuracy": 0.556640625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 15.12228775024414, |
|
"eval_VitaminC_euclidean_ap": 0.5542894540784595, |
|
"eval_VitaminC_euclidean_f1": 0.6640211640211641, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.3716983795166, |
|
"eval_VitaminC_euclidean_precision": 0.497029702970297, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 305.93597412109375, |
|
"eval_VitaminC_manhattan_ap": 0.5533328154567183, |
|
"eval_VitaminC_manhattan_f1": 0.6649006622516557, |
|
"eval_VitaminC_manhattan_f1_threshold": 509.4247741699219, |
|
"eval_VitaminC_manhattan_precision": 0.498015873015873, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 320.3775329589844, |
|
"eval_VitaminC_max_ap": 0.5542894540784595, |
|
"eval_VitaminC_max_f1": 0.6720867208672087, |
|
"eval_VitaminC_max_f1_threshold": 509.4247741699219, |
|
"eval_VitaminC_max_precision": 0.5092402464065708, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5542894540784595, |
|
"eval_sts-test_pearson_cosine": 0.8820726638294588, |
|
"eval_sts-test_pearson_dot": 0.8723940521896922, |
|
"eval_sts-test_pearson_euclidean": 0.9038814103150634, |
|
"eval_sts-test_pearson_manhattan": 0.904449390563823, |
|
"eval_sts-test_pearson_max": 0.904449390563823, |
|
"eval_sts-test_spearman_cosine": 0.9051641183600871, |
|
"eval_sts-test_spearman_dot": 0.8721959088443044, |
|
"eval_sts-test_spearman_euclidean": 0.8999642007914521, |
|
"eval_sts-test_spearman_manhattan": 0.9005904051921018, |
|
"eval_sts-test_spearman_max": 0.9051641183600871, |
|
"eval_vitaminc-pairs_loss": 1.48486328125, |
|
"eval_vitaminc-pairs_runtime": 1.8874, |
|
"eval_vitaminc-pairs_samples_per_second": 57.222, |
|
"eval_vitaminc-pairs_steps_per_second": 1.06, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_negation-triplets_loss": 0.9023827314376831, |
|
"eval_negation-triplets_runtime": 0.302, |
|
"eval_negation-triplets_samples_per_second": 211.927, |
|
"eval_negation-triplets_steps_per_second": 3.311, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_scitail-pairs-pos_loss": 0.10495099425315857, |
|
"eval_scitail-pairs-pos_runtime": 0.3856, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.031, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.593, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_scitail-pairs-qa_loss": 0.0008332311408594251, |
|
"eval_scitail-pairs-qa_runtime": 0.5224, |
|
"eval_scitail-pairs-qa_samples_per_second": 245.005, |
|
"eval_scitail-pairs-qa_steps_per_second": 3.828, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_xsum-pairs_loss": 0.028531953692436218, |
|
"eval_xsum-pairs_runtime": 2.7425, |
|
"eval_xsum-pairs_samples_per_second": 46.672, |
|
"eval_xsum-pairs_steps_per_second": 0.729, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_sciq_pairs_loss": 0.015175853855907917, |
|
"eval_sciq_pairs_runtime": 2.8294, |
|
"eval_sciq_pairs_samples_per_second": 45.239, |
|
"eval_sciq_pairs_steps_per_second": 0.707, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_qasc_pairs_loss": 0.09416583180427551, |
|
"eval_qasc_pairs_runtime": 0.6538, |
|
"eval_qasc_pairs_samples_per_second": 195.781, |
|
"eval_qasc_pairs_steps_per_second": 3.059, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_openbookqa_pairs_loss": 0.715216875076294, |
|
"eval_openbookqa_pairs_runtime": 0.578, |
|
"eval_openbookqa_pairs_samples_per_second": 221.449, |
|
"eval_openbookqa_pairs_steps_per_second": 3.46, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_msmarco_pairs_loss": 0.1417744755744934, |
|
"eval_msmarco_pairs_runtime": 1.4882, |
|
"eval_msmarco_pairs_samples_per_second": 86.012, |
|
"eval_msmarco_pairs_steps_per_second": 1.344, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_nq_pairs_loss": 0.10870223492383957, |
|
"eval_nq_pairs_runtime": 2.3451, |
|
"eval_nq_pairs_samples_per_second": 54.583, |
|
"eval_nq_pairs_steps_per_second": 0.853, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_trivia_pairs_loss": 0.49194595217704773, |
|
"eval_trivia_pairs_runtime": 3.5796, |
|
"eval_trivia_pairs_samples_per_second": 35.759, |
|
"eval_trivia_pairs_steps_per_second": 0.559, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_gooaq_pairs_loss": 0.2616226375102997, |
|
"eval_gooaq_pairs_runtime": 0.9137, |
|
"eval_gooaq_pairs_samples_per_second": 140.093, |
|
"eval_gooaq_pairs_steps_per_second": 2.189, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9101123595505618, |
|
"eval_paws-pos_loss": 0.02422034554183483, |
|
"eval_paws-pos_runtime": 0.6895, |
|
"eval_paws-pos_samples_per_second": 185.641, |
|
"eval_paws-pos_steps_per_second": 2.901, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9123595505617977, |
|
"grad_norm": 3.427104949951172, |
|
"learning_rate": 2.8493228208929387e-05, |
|
"loss": 0.5189, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.9146067415730337, |
|
"grad_norm": 4.941195487976074, |
|
"learning_rate": 2.8440975506248268e-05, |
|
"loss": 0.649, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9168539325842696, |
|
"grad_norm": 2.7992403507232666, |
|
"learning_rate": 2.8388577936746633e-05, |
|
"loss": 0.2982, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9191011235955057, |
|
"grad_norm": 3.8877484798431396, |
|
"learning_rate": 2.833603652054199e-05, |
|
"loss": 0.7511, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9213483146067416, |
|
"grad_norm": 3.2458090782165527, |
|
"learning_rate": 2.8283352280552348e-05, |
|
"loss": 0.5164, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9235955056179775, |
|
"grad_norm": 3.7385945320129395, |
|
"learning_rate": 2.8230526242476332e-05, |
|
"loss": 0.5924, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9258426966292135, |
|
"grad_norm": 4.369627952575684, |
|
"learning_rate": 2.8177559434773203e-05, |
|
"loss": 0.8191, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9280898876404494, |
|
"grad_norm": 2.95206356048584, |
|
"learning_rate": 2.8124452888642838e-05, |
|
"loss": 0.2311, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9303370786516854, |
|
"grad_norm": 3.984375238418579, |
|
"learning_rate": 2.8071207638005662e-05, |
|
"loss": 0.7421, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9325842696629213, |
|
"grad_norm": 3.0188541412353516, |
|
"learning_rate": 2.801782471948248e-05, |
|
"loss": 0.2936, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9348314606741573, |
|
"grad_norm": 4.104308605194092, |
|
"learning_rate": 2.7964305172374362e-05, |
|
"loss": 0.737, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9370786516853933, |
|
"grad_norm": 3.686523675918579, |
|
"learning_rate": 2.791065003864235e-05, |
|
"loss": 0.6539, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9393258426966292, |
|
"grad_norm": 3.839590311050415, |
|
"learning_rate": 2.785686036288719e-05, |
|
"loss": 0.6855, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9415730337078652, |
|
"grad_norm": 4.174718856811523, |
|
"learning_rate": 2.780293719232902e-05, |
|
"loss": 0.8134, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9438202247191011, |
|
"grad_norm": 4.046380043029785, |
|
"learning_rate": 2.7748881576786946e-05, |
|
"loss": 0.6885, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.946067415730337, |
|
"grad_norm": 3.4202940464019775, |
|
"learning_rate": 2.7694694568658613e-05, |
|
"loss": 0.5581, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9483146067415731, |
|
"grad_norm": 3.787081718444824, |
|
"learning_rate": 2.764037722289973e-05, |
|
"loss": 0.8029, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.950561797752809, |
|
"grad_norm": 3.870718240737915, |
|
"learning_rate": 2.7585930597003524e-05, |
|
"loss": 0.8126, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.952808988764045, |
|
"grad_norm": 3.1959424018859863, |
|
"learning_rate": 2.753135575098015e-05, |
|
"loss": 0.8425, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9550561797752809, |
|
"grad_norm": 0.4186573922634125, |
|
"learning_rate": 2.7476653747336047e-05, |
|
"loss": 0.049, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9573033707865168, |
|
"grad_norm": 4.299917697906494, |
|
"learning_rate": 2.7421825651053265e-05, |
|
"loss": 0.7849, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9595505617977528, |
|
"grad_norm": 2.6435227394104004, |
|
"learning_rate": 2.736687252956873e-05, |
|
"loss": 0.068, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9617977528089887, |
|
"grad_norm": 2.717653274536133, |
|
"learning_rate": 2.7311795452753443e-05, |
|
"loss": 0.2925, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.9640449438202248, |
|
"grad_norm": 3.6929807662963867, |
|
"learning_rate": 2.7256595492891683e-05, |
|
"loss": 0.777, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9662921348314607, |
|
"grad_norm": 2.8760790824890137, |
|
"learning_rate": 2.720127372466011e-05, |
|
"loss": 0.7397, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9685393258426966, |
|
"grad_norm": 0.03685740381479263, |
|
"learning_rate": 2.714583122510683e-05, |
|
"loss": 0.0007, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9707865168539326, |
|
"grad_norm": 4.058692455291748, |
|
"learning_rate": 2.709026907363047e-05, |
|
"loss": 0.8535, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9730337078651685, |
|
"grad_norm": 4.2914276123046875, |
|
"learning_rate": 2.703458835195911e-05, |
|
"loss": 0.7026, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9752808988764045, |
|
"grad_norm": 3.735518217086792, |
|
"learning_rate": 2.6978790144129262e-05, |
|
"loss": 0.7557, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9775280898876404, |
|
"grad_norm": 4.058504104614258, |
|
"learning_rate": 2.6922875536464747e-05, |
|
"loss": 0.7225, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9797752808988764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6866845617555555e-05, |
|
"loss": 0.0, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.9820224719101124, |
|
"grad_norm": 5.648872375488281, |
|
"learning_rate": 2.6810701478236642e-05, |
|
"loss": 0.4131, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9842696629213483, |
|
"grad_norm": 2.7032744884490967, |
|
"learning_rate": 2.6754444211566702e-05, |
|
"loss": 0.2824, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9865168539325843, |
|
"grad_norm": 3.150801420211792, |
|
"learning_rate": 2.6698074912806882e-05, |
|
"loss": 0.3144, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9887640449438202, |
|
"grad_norm": 2.3572490215301514, |
|
"learning_rate": 2.6641594679399448e-05, |
|
"loss": 0.0509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9910112359550561, |
|
"grad_norm": 3.2544448375701904, |
|
"learning_rate": 2.6585004610946452e-05, |
|
"loss": 0.7645, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9932584269662922, |
|
"grad_norm": 4.310440540313721, |
|
"learning_rate": 2.6528305809188273e-05, |
|
"loss": 0.2787, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9955056179775281, |
|
"grad_norm": 3.863487482070923, |
|
"learning_rate": 2.6471499377982225e-05, |
|
"loss": 0.64, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9977528089887641, |
|
"grad_norm": 6.1020612716674805, |
|
"learning_rate": 2.6414586423281017e-05, |
|
"loss": 0.4045, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.1245224475860596, |
|
"learning_rate": 2.6357568053111255e-05, |
|
"loss": 0.7661, |
|
"step": 445 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 890, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 89, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 640, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|