bobox's picture
Training in progress, step 445, checkpoint
ef463a7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 45,
"global_step": 445,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0022471910112359553,
"grad_norm": 3.9492883682250977,
"learning_rate": 1.9662921348314604e-07,
"loss": 0.8103,
"step": 1
},
{
"epoch": 0.0044943820224719105,
"grad_norm": 4.117438793182373,
"learning_rate": 3.932584269662921e-07,
"loss": 0.8803,
"step": 2
},
{
"epoch": 0.006741573033707865,
"grad_norm": 3.809002161026001,
"learning_rate": 5.898876404494381e-07,
"loss": 0.8219,
"step": 3
},
{
"epoch": 0.008988764044943821,
"grad_norm": 0.7417504787445068,
"learning_rate": 7.865168539325842e-07,
"loss": 0.0574,
"step": 4
},
{
"epoch": 0.011235955056179775,
"grad_norm": 2.707460403442383,
"learning_rate": 9.831460674157302e-07,
"loss": 0.3044,
"step": 5
},
{
"epoch": 0.01348314606741573,
"grad_norm": 3.082705497741699,
"learning_rate": 1.1797752808988763e-06,
"loss": 0.3306,
"step": 6
},
{
"epoch": 0.015730337078651686,
"grad_norm": 3.102416753768921,
"learning_rate": 1.3764044943820223e-06,
"loss": 0.759,
"step": 7
},
{
"epoch": 0.017977528089887642,
"grad_norm": 0.6271047592163086,
"learning_rate": 1.5730337078651683e-06,
"loss": 0.0472,
"step": 8
},
{
"epoch": 0.020224719101123594,
"grad_norm": 3.1362593173980713,
"learning_rate": 1.7696629213483144e-06,
"loss": 0.7782,
"step": 9
},
{
"epoch": 0.02247191011235955,
"grad_norm": 1.124997615814209,
"learning_rate": 1.9662921348314604e-06,
"loss": 0.0757,
"step": 10
},
{
"epoch": 0.024719101123595506,
"grad_norm": 3.194413185119629,
"learning_rate": 2.1629213483146067e-06,
"loss": 0.7778,
"step": 11
},
{
"epoch": 0.02696629213483146,
"grad_norm": 3.966202974319458,
"learning_rate": 2.3595505617977525e-06,
"loss": 0.7111,
"step": 12
},
{
"epoch": 0.029213483146067417,
"grad_norm": 3.63393235206604,
"learning_rate": 2.5561797752808988e-06,
"loss": 0.6598,
"step": 13
},
{
"epoch": 0.03146067415730337,
"grad_norm": 4.087065696716309,
"learning_rate": 2.7528089887640446e-06,
"loss": 0.8901,
"step": 14
},
{
"epoch": 0.033707865168539325,
"grad_norm": 2.769573211669922,
"learning_rate": 2.949438202247191e-06,
"loss": 0.3206,
"step": 15
},
{
"epoch": 0.035955056179775284,
"grad_norm": 2.630620002746582,
"learning_rate": 3.1460674157303367e-06,
"loss": 0.3408,
"step": 16
},
{
"epoch": 0.038202247191011236,
"grad_norm": 2.9570937156677246,
"learning_rate": 3.342696629213483e-06,
"loss": 0.5623,
"step": 17
},
{
"epoch": 0.04044943820224719,
"grad_norm": 1.0999970436096191,
"learning_rate": 3.5393258426966288e-06,
"loss": 0.0758,
"step": 18
},
{
"epoch": 0.04269662921348315,
"grad_norm": 5.516472816467285,
"learning_rate": 3.735955056179775e-06,
"loss": 0.994,
"step": 19
},
{
"epoch": 0.0449438202247191,
"grad_norm": 6.245299816131592,
"learning_rate": 3.932584269662921e-06,
"loss": 2.4196,
"step": 20
},
{
"epoch": 0.04719101123595506,
"grad_norm": 0.546605110168457,
"learning_rate": 4.129213483146067e-06,
"loss": 0.0561,
"step": 21
},
{
"epoch": 0.04943820224719101,
"grad_norm": 0.7049635648727417,
"learning_rate": 4.325842696629213e-06,
"loss": 0.0827,
"step": 22
},
{
"epoch": 0.051685393258426963,
"grad_norm": 3.1022439002990723,
"learning_rate": 4.522471910112359e-06,
"loss": 0.7405,
"step": 23
},
{
"epoch": 0.05393258426966292,
"grad_norm": 4.534759044647217,
"learning_rate": 4.719101123595505e-06,
"loss": 0.9656,
"step": 24
},
{
"epoch": 0.056179775280898875,
"grad_norm": 3.0486032962799072,
"learning_rate": 4.915730337078652e-06,
"loss": 0.7855,
"step": 25
},
{
"epoch": 0.058426966292134834,
"grad_norm": 3.7457478046417236,
"learning_rate": 5.1123595505617975e-06,
"loss": 0.6349,
"step": 26
},
{
"epoch": 0.060674157303370786,
"grad_norm": 3.2051479816436768,
"learning_rate": 5.308988764044943e-06,
"loss": 0.8087,
"step": 27
},
{
"epoch": 0.06292134831460675,
"grad_norm": 4.389094829559326,
"learning_rate": 5.505617977528089e-06,
"loss": 0.9282,
"step": 28
},
{
"epoch": 0.0651685393258427,
"grad_norm": 2.920410394668579,
"learning_rate": 5.702247191011236e-06,
"loss": 0.3377,
"step": 29
},
{
"epoch": 0.06741573033707865,
"grad_norm": 2.7193148136138916,
"learning_rate": 5.898876404494382e-06,
"loss": 0.3289,
"step": 30
},
{
"epoch": 0.0696629213483146,
"grad_norm": 4.0008225440979,
"learning_rate": 6.0955056179775275e-06,
"loss": 0.6314,
"step": 31
},
{
"epoch": 0.07191011235955057,
"grad_norm": 0.5842159390449524,
"learning_rate": 6.292134831460673e-06,
"loss": 0.0611,
"step": 32
},
{
"epoch": 0.07415730337078652,
"grad_norm": 3.1256043910980225,
"learning_rate": 6.48876404494382e-06,
"loss": 0.8942,
"step": 33
},
{
"epoch": 0.07640449438202247,
"grad_norm": 0.9526051878929138,
"learning_rate": 6.685393258426966e-06,
"loss": 0.0701,
"step": 34
},
{
"epoch": 0.07865168539325842,
"grad_norm": 4.061926364898682,
"learning_rate": 6.882022471910112e-06,
"loss": 0.8506,
"step": 35
},
{
"epoch": 0.08089887640449438,
"grad_norm": 2.8898491859436035,
"learning_rate": 7.0786516853932575e-06,
"loss": 0.3386,
"step": 36
},
{
"epoch": 0.08314606741573034,
"grad_norm": 0.9806709289550781,
"learning_rate": 7.275280898876404e-06,
"loss": 0.0701,
"step": 37
},
{
"epoch": 0.0853932584269663,
"grad_norm": 3.8004391193389893,
"learning_rate": 7.47191011235955e-06,
"loss": 0.8042,
"step": 38
},
{
"epoch": 0.08764044943820225,
"grad_norm": 4.089083194732666,
"learning_rate": 7.668539325842697e-06,
"loss": 0.8744,
"step": 39
},
{
"epoch": 0.0898876404494382,
"grad_norm": 3.419440984725952,
"learning_rate": 7.865168539325842e-06,
"loss": 0.8644,
"step": 40
},
{
"epoch": 0.09213483146067415,
"grad_norm": 4.094921588897705,
"learning_rate": 8.061797752808988e-06,
"loss": 0.8647,
"step": 41
},
{
"epoch": 0.09438202247191012,
"grad_norm": 3.9199764728546143,
"learning_rate": 8.258426966292133e-06,
"loss": 0.7916,
"step": 42
},
{
"epoch": 0.09662921348314607,
"grad_norm": 4.082360744476318,
"learning_rate": 8.45505617977528e-06,
"loss": 0.8599,
"step": 43
},
{
"epoch": 0.09887640449438202,
"grad_norm": 0.6443855166435242,
"learning_rate": 8.651685393258427e-06,
"loss": 0.0523,
"step": 44
},
{
"epoch": 0.10112359550561797,
"grad_norm": 4.051048278808594,
"learning_rate": 8.848314606741572e-06,
"loss": 0.6968,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.8300318121910095,
"eval_VitaminC_cosine_ap": 0.5514483751609435,
"eval_VitaminC_cosine_f1": 0.6657718120805369,
"eval_VitaminC_cosine_f1_threshold": 0.37456807494163513,
"eval_VitaminC_cosine_precision": 0.5020242914979757,
"eval_VitaminC_cosine_recall": 0.9880478087649402,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 311.380615234375,
"eval_VitaminC_dot_ap": 0.5333497363350208,
"eval_VitaminC_dot_f1": 0.6684709066305818,
"eval_VitaminC_dot_f1_threshold": 144.8927001953125,
"eval_VitaminC_dot_precision": 0.5061475409836066,
"eval_VitaminC_dot_recall": 0.9840637450199203,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 11.028482437133789,
"eval_VitaminC_euclidean_ap": 0.5544340410314673,
"eval_VitaminC_euclidean_f1": 0.6649006622516557,
"eval_VitaminC_euclidean_f1_threshold": 23.38451385498047,
"eval_VitaminC_euclidean_precision": 0.498015873015873,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.556640625,
"eval_VitaminC_manhattan_accuracy_threshold": 232.38790893554688,
"eval_VitaminC_manhattan_ap": 0.5515569514532939,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 498.126220703125,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 311.380615234375,
"eval_VitaminC_max_ap": 0.5544340410314673,
"eval_VitaminC_max_f1": 0.6684709066305818,
"eval_VitaminC_max_f1_threshold": 498.126220703125,
"eval_VitaminC_max_precision": 0.5061475409836066,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5544340410314673,
"eval_sts-test_pearson_cosine": 0.8803067271464453,
"eval_sts-test_pearson_dot": 0.8698285291814508,
"eval_sts-test_pearson_euclidean": 0.9023937835918766,
"eval_sts-test_pearson_manhattan": 0.9020751259156048,
"eval_sts-test_pearson_max": 0.9023937835918766,
"eval_sts-test_spearman_cosine": 0.9038005474254912,
"eval_sts-test_spearman_dot": 0.8707897794601254,
"eval_sts-test_spearman_euclidean": 0.8989733631129851,
"eval_sts-test_spearman_manhattan": 0.8980189529612906,
"eval_sts-test_spearman_max": 0.9038005474254912,
"eval_vitaminc-pairs_loss": 1.7273772954940796,
"eval_vitaminc-pairs_runtime": 1.8924,
"eval_vitaminc-pairs_samples_per_second": 57.071,
"eval_vitaminc-pairs_steps_per_second": 1.057,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_negation-triplets_loss": 0.9174526929855347,
"eval_negation-triplets_runtime": 0.2972,
"eval_negation-triplets_samples_per_second": 215.314,
"eval_negation-triplets_steps_per_second": 3.364,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_scitail-pairs-pos_loss": 0.07368183881044388,
"eval_scitail-pairs-pos_runtime": 0.379,
"eval_scitail-pairs-pos_samples_per_second": 142.492,
"eval_scitail-pairs-pos_steps_per_second": 2.639,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_scitail-pairs-qa_loss": 0.001584450714290142,
"eval_scitail-pairs-qa_runtime": 0.5178,
"eval_scitail-pairs-qa_samples_per_second": 247.198,
"eval_scitail-pairs-qa_steps_per_second": 3.862,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_xsum-pairs_loss": 0.038235221058130264,
"eval_xsum-pairs_runtime": 2.7268,
"eval_xsum-pairs_samples_per_second": 46.941,
"eval_xsum-pairs_steps_per_second": 0.733,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_sciq_pairs_loss": 0.01538097020238638,
"eval_sciq_pairs_runtime": 2.7808,
"eval_sciq_pairs_samples_per_second": 46.029,
"eval_sciq_pairs_steps_per_second": 0.719,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_qasc_pairs_loss": 0.09078988432884216,
"eval_qasc_pairs_runtime": 0.6473,
"eval_qasc_pairs_samples_per_second": 197.758,
"eval_qasc_pairs_steps_per_second": 3.09,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_openbookqa_pairs_loss": 0.6754768490791321,
"eval_openbookqa_pairs_runtime": 0.573,
"eval_openbookqa_pairs_samples_per_second": 223.397,
"eval_openbookqa_pairs_steps_per_second": 3.491,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_msmarco_pairs_loss": 0.15991328656673431,
"eval_msmarco_pairs_runtime": 1.487,
"eval_msmarco_pairs_samples_per_second": 86.078,
"eval_msmarco_pairs_steps_per_second": 1.345,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_nq_pairs_loss": 0.09591890126466751,
"eval_nq_pairs_runtime": 2.3943,
"eval_nq_pairs_samples_per_second": 53.459,
"eval_nq_pairs_steps_per_second": 0.835,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_trivia_pairs_loss": 0.5305934548377991,
"eval_trivia_pairs_runtime": 3.5752,
"eval_trivia_pairs_samples_per_second": 35.802,
"eval_trivia_pairs_steps_per_second": 0.559,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_gooaq_pairs_loss": 0.29681000113487244,
"eval_gooaq_pairs_runtime": 0.9087,
"eval_gooaq_pairs_samples_per_second": 140.861,
"eval_gooaq_pairs_steps_per_second": 2.201,
"step": 45
},
{
"epoch": 0.10112359550561797,
"eval_paws-pos_loss": 0.024501051753759384,
"eval_paws-pos_runtime": 0.6773,
"eval_paws-pos_samples_per_second": 188.996,
"eval_paws-pos_steps_per_second": 2.953,
"step": 45
},
{
"epoch": 0.10337078651685393,
"grad_norm": 2.9021923542022705,
"learning_rate": 9.044943820224718e-06,
"loss": 0.3376,
"step": 46
},
{
"epoch": 0.10561797752808989,
"grad_norm": 3.179288625717163,
"learning_rate": 9.241573033707863e-06,
"loss": 0.5174,
"step": 47
},
{
"epoch": 0.10786516853932585,
"grad_norm": 3.1919493675231934,
"learning_rate": 9.43820224719101e-06,
"loss": 0.8162,
"step": 48
},
{
"epoch": 0.1101123595505618,
"grad_norm": 2.8602521419525146,
"learning_rate": 9.634831460674157e-06,
"loss": 0.3545,
"step": 49
},
{
"epoch": 0.11235955056179775,
"grad_norm": 2.7570478916168213,
"learning_rate": 9.831460674157303e-06,
"loss": 0.315,
"step": 50
},
{
"epoch": 0.1146067415730337,
"grad_norm": 0.8641514778137207,
"learning_rate": 1.0028089887640448e-05,
"loss": 0.0627,
"step": 51
},
{
"epoch": 0.11685393258426967,
"grad_norm": 3.9437484741210938,
"learning_rate": 1.0224719101123595e-05,
"loss": 0.8851,
"step": 52
},
{
"epoch": 0.11910112359550562,
"grad_norm": 4.144773006439209,
"learning_rate": 1.042134831460674e-05,
"loss": 0.8382,
"step": 53
},
{
"epoch": 0.12134831460674157,
"grad_norm": 4.277736186981201,
"learning_rate": 1.0617977528089887e-05,
"loss": 0.733,
"step": 54
},
{
"epoch": 0.12359550561797752,
"grad_norm": 4.025904178619385,
"learning_rate": 1.0814606741573032e-05,
"loss": 0.7173,
"step": 55
},
{
"epoch": 0.1258426966292135,
"grad_norm": 3.923046827316284,
"learning_rate": 1.1011235955056178e-05,
"loss": 0.7659,
"step": 56
},
{
"epoch": 0.12808988764044943,
"grad_norm": 3.2707138061523438,
"learning_rate": 1.1207865168539325e-05,
"loss": 0.793,
"step": 57
},
{
"epoch": 0.1303370786516854,
"grad_norm": 3.1660959720611572,
"learning_rate": 1.1404494382022472e-05,
"loss": 0.5426,
"step": 58
},
{
"epoch": 0.13258426966292136,
"grad_norm": 4.5236663818359375,
"learning_rate": 1.1601123595505617e-05,
"loss": 0.7641,
"step": 59
},
{
"epoch": 0.1348314606741573,
"grad_norm": 0.5771021246910095,
"learning_rate": 1.1797752808988763e-05,
"loss": 0.0657,
"step": 60
},
{
"epoch": 0.13707865168539327,
"grad_norm": 3.8541343212127686,
"learning_rate": 1.1994382022471908e-05,
"loss": 0.7836,
"step": 61
},
{
"epoch": 0.1393258426966292,
"grad_norm": 4.284148693084717,
"learning_rate": 1.2191011235955055e-05,
"loss": 0.9306,
"step": 62
},
{
"epoch": 0.14157303370786517,
"grad_norm": 4.175032615661621,
"learning_rate": 1.23876404494382e-05,
"loss": 0.8673,
"step": 63
},
{
"epoch": 0.14382022471910114,
"grad_norm": 5.025452136993408,
"learning_rate": 1.2584269662921347e-05,
"loss": 0.9296,
"step": 64
},
{
"epoch": 0.14606741573033707,
"grad_norm": 3.970745086669922,
"learning_rate": 1.2780898876404493e-05,
"loss": 0.8211,
"step": 65
},
{
"epoch": 0.14831460674157304,
"grad_norm": 3.150197744369507,
"learning_rate": 1.297752808988764e-05,
"loss": 0.7685,
"step": 66
},
{
"epoch": 0.15056179775280898,
"grad_norm": 4.280994415283203,
"learning_rate": 1.3174157303370785e-05,
"loss": 0.7139,
"step": 67
},
{
"epoch": 0.15280898876404495,
"grad_norm": 4.288730621337891,
"learning_rate": 1.3370786516853932e-05,
"loss": 0.8241,
"step": 68
},
{
"epoch": 0.1550561797752809,
"grad_norm": 3.7402424812316895,
"learning_rate": 1.3567415730337077e-05,
"loss": 0.6256,
"step": 69
},
{
"epoch": 0.15730337078651685,
"grad_norm": 4.478890895843506,
"learning_rate": 1.3764044943820223e-05,
"loss": 0.8842,
"step": 70
},
{
"epoch": 0.15955056179775282,
"grad_norm": 3.8147876262664795,
"learning_rate": 1.3960674157303368e-05,
"loss": 0.804,
"step": 71
},
{
"epoch": 0.16179775280898875,
"grad_norm": 0.7314035296440125,
"learning_rate": 1.4157303370786515e-05,
"loss": 0.0989,
"step": 72
},
{
"epoch": 0.16404494382022472,
"grad_norm": 3.074303150177002,
"learning_rate": 1.4353932584269662e-05,
"loss": 0.332,
"step": 73
},
{
"epoch": 0.1662921348314607,
"grad_norm": 3.414987325668335,
"learning_rate": 1.4550561797752808e-05,
"loss": 0.5736,
"step": 74
},
{
"epoch": 0.16853932584269662,
"grad_norm": 3.7946674823760986,
"learning_rate": 1.4747191011235953e-05,
"loss": 0.8285,
"step": 75
},
{
"epoch": 0.1707865168539326,
"grad_norm": 4.310474395751953,
"learning_rate": 1.49438202247191e-05,
"loss": 0.9561,
"step": 76
},
{
"epoch": 0.17303370786516853,
"grad_norm": 0.9791378974914551,
"learning_rate": 1.5140449438202245e-05,
"loss": 0.0633,
"step": 77
},
{
"epoch": 0.1752808988764045,
"grad_norm": 0.6351795196533203,
"learning_rate": 1.5337078651685393e-05,
"loss": 0.0848,
"step": 78
},
{
"epoch": 0.17752808988764046,
"grad_norm": 3.4832303524017334,
"learning_rate": 1.553370786516854e-05,
"loss": 0.8325,
"step": 79
},
{
"epoch": 0.1797752808988764,
"grad_norm": 5.115800380706787,
"learning_rate": 1.5730337078651683e-05,
"loss": 1.0011,
"step": 80
},
{
"epoch": 0.18202247191011237,
"grad_norm": 3.552396297454834,
"learning_rate": 1.592696629213483e-05,
"loss": 0.8697,
"step": 81
},
{
"epoch": 0.1842696629213483,
"grad_norm": 4.491541862487793,
"learning_rate": 1.6123595505617977e-05,
"loss": 0.8344,
"step": 82
},
{
"epoch": 0.18651685393258427,
"grad_norm": 4.73278284072876,
"learning_rate": 1.6320224719101122e-05,
"loss": 0.9967,
"step": 83
},
{
"epoch": 0.18876404494382024,
"grad_norm": 2.994192123413086,
"learning_rate": 1.6516853932584267e-05,
"loss": 0.4638,
"step": 84
},
{
"epoch": 0.19101123595505617,
"grad_norm": 4.142394542694092,
"learning_rate": 1.6713483146067415e-05,
"loss": 0.8994,
"step": 85
},
{
"epoch": 0.19325842696629214,
"grad_norm": 4.149839401245117,
"learning_rate": 1.691011235955056e-05,
"loss": 0.7789,
"step": 86
},
{
"epoch": 0.19550561797752808,
"grad_norm": 0.45795938372612,
"learning_rate": 1.7106741573033705e-05,
"loss": 0.0555,
"step": 87
},
{
"epoch": 0.19775280898876405,
"grad_norm": 3.4293618202209473,
"learning_rate": 1.7303370786516853e-05,
"loss": 0.3778,
"step": 88
},
{
"epoch": 0.2,
"grad_norm": 4.041529655456543,
"learning_rate": 1.75e-05,
"loss": 0.708,
"step": 89
},
{
"epoch": 0.20224719101123595,
"grad_norm": 0.6160458922386169,
"learning_rate": 1.7696629213483143e-05,
"loss": 0.0689,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.74173903465271,
"eval_VitaminC_cosine_ap": 0.5513770735348443,
"eval_VitaminC_cosine_f1": 0.6675531914893617,
"eval_VitaminC_cosine_f1_threshold": 0.32480987906455994,
"eval_VitaminC_cosine_precision": 0.500998003992016,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.560546875,
"eval_VitaminC_dot_accuracy_threshold": 297.664794921875,
"eval_VitaminC_dot_ap": 0.5340088824099496,
"eval_VitaminC_dot_f1": 0.6666666666666667,
"eval_VitaminC_dot_f1_threshold": 126.67618560791016,
"eval_VitaminC_dot_precision": 0.501002004008016,
"eval_VitaminC_dot_recall": 0.9960159362549801,
"eval_VitaminC_euclidean_accuracy": 0.55859375,
"eval_VitaminC_euclidean_accuracy_threshold": 14.345688819885254,
"eval_VitaminC_euclidean_ap": 0.5542145004976253,
"eval_VitaminC_euclidean_f1": 0.6675531914893617,
"eval_VitaminC_euclidean_f1_threshold": 23.381019592285156,
"eval_VitaminC_euclidean_precision": 0.500998003992016,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.552734375,
"eval_VitaminC_manhattan_accuracy_threshold": 232.7296142578125,
"eval_VitaminC_manhattan_ap": 0.5523953693907266,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 496.4290466308594,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 297.664794921875,
"eval_VitaminC_max_ap": 0.5542145004976253,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 496.4290466308594,
"eval_VitaminC_max_precision": 0.501002004008016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5542145004976253,
"eval_sts-test_pearson_cosine": 0.8800782580988616,
"eval_sts-test_pearson_dot": 0.8687642290872662,
"eval_sts-test_pearson_euclidean": 0.9034088230546415,
"eval_sts-test_pearson_manhattan": 0.9030146212284895,
"eval_sts-test_pearson_max": 0.9034088230546415,
"eval_sts-test_spearman_cosine": 0.904560289590133,
"eval_sts-test_spearman_dot": 0.8705944849554133,
"eval_sts-test_spearman_euclidean": 0.8998959103665689,
"eval_sts-test_spearman_manhattan": 0.8995891404697307,
"eval_sts-test_spearman_max": 0.904560289590133,
"eval_vitaminc-pairs_loss": 1.6141985654830933,
"eval_vitaminc-pairs_runtime": 1.864,
"eval_vitaminc-pairs_samples_per_second": 57.94,
"eval_vitaminc-pairs_steps_per_second": 1.073,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_negation-triplets_loss": 0.9220322370529175,
"eval_negation-triplets_runtime": 0.3199,
"eval_negation-triplets_samples_per_second": 200.043,
"eval_negation-triplets_steps_per_second": 3.126,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_scitail-pairs-pos_loss": 0.0654294565320015,
"eval_scitail-pairs-pos_runtime": 0.4625,
"eval_scitail-pairs-pos_samples_per_second": 116.76,
"eval_scitail-pairs-pos_steps_per_second": 2.162,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_scitail-pairs-qa_loss": 0.0015887805493548512,
"eval_scitail-pairs-qa_runtime": 0.5768,
"eval_scitail-pairs-qa_samples_per_second": 221.899,
"eval_scitail-pairs-qa_steps_per_second": 3.467,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_xsum-pairs_loss": 0.03991687670350075,
"eval_xsum-pairs_runtime": 2.7403,
"eval_xsum-pairs_samples_per_second": 46.71,
"eval_xsum-pairs_steps_per_second": 0.73,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_sciq_pairs_loss": 0.01584962010383606,
"eval_sciq_pairs_runtime": 2.8429,
"eval_sciq_pairs_samples_per_second": 45.024,
"eval_sciq_pairs_steps_per_second": 0.703,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_qasc_pairs_loss": 0.09112343192100525,
"eval_qasc_pairs_runtime": 0.6492,
"eval_qasc_pairs_samples_per_second": 197.154,
"eval_qasc_pairs_steps_per_second": 3.081,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_openbookqa_pairs_loss": 0.7132729887962341,
"eval_openbookqa_pairs_runtime": 0.5847,
"eval_openbookqa_pairs_samples_per_second": 218.922,
"eval_openbookqa_pairs_steps_per_second": 3.421,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_msmarco_pairs_loss": 0.15173853933811188,
"eval_msmarco_pairs_runtime": 1.4966,
"eval_msmarco_pairs_samples_per_second": 85.527,
"eval_msmarco_pairs_steps_per_second": 1.336,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_nq_pairs_loss": 0.09653442353010178,
"eval_nq_pairs_runtime": 2.3749,
"eval_nq_pairs_samples_per_second": 53.897,
"eval_nq_pairs_steps_per_second": 0.842,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_trivia_pairs_loss": 0.5191965699195862,
"eval_trivia_pairs_runtime": 3.6006,
"eval_trivia_pairs_samples_per_second": 35.55,
"eval_trivia_pairs_steps_per_second": 0.555,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_gooaq_pairs_loss": 0.30713126063346863,
"eval_gooaq_pairs_runtime": 0.9131,
"eval_gooaq_pairs_samples_per_second": 140.178,
"eval_gooaq_pairs_steps_per_second": 2.19,
"step": 90
},
{
"epoch": 0.20224719101123595,
"eval_paws-pos_loss": 0.024471310898661613,
"eval_paws-pos_runtime": 0.6872,
"eval_paws-pos_samples_per_second": 186.254,
"eval_paws-pos_steps_per_second": 2.91,
"step": 90
},
{
"epoch": 0.20449438202247192,
"grad_norm": 6.209661483764648,
"learning_rate": 1.7893258426966292e-05,
"loss": 2.3489,
"step": 91
},
{
"epoch": 0.20674157303370785,
"grad_norm": 3.1821141242980957,
"learning_rate": 1.8089887640449437e-05,
"loss": 0.741,
"step": 92
},
{
"epoch": 0.20898876404494382,
"grad_norm": 3.871994972229004,
"learning_rate": 1.8286516853932585e-05,
"loss": 0.7729,
"step": 93
},
{
"epoch": 0.21123595505617979,
"grad_norm": 0.5280765891075134,
"learning_rate": 1.8483146067415727e-05,
"loss": 0.0631,
"step": 94
},
{
"epoch": 0.21348314606741572,
"grad_norm": 4.475915431976318,
"learning_rate": 1.8679775280898875e-05,
"loss": 0.9342,
"step": 95
},
{
"epoch": 0.2157303370786517,
"grad_norm": 3.949381113052368,
"learning_rate": 1.887640449438202e-05,
"loss": 0.8581,
"step": 96
},
{
"epoch": 0.21797752808988763,
"grad_norm": 2.910426616668701,
"learning_rate": 1.907303370786517e-05,
"loss": 0.5198,
"step": 97
},
{
"epoch": 0.2202247191011236,
"grad_norm": 4.028941631317139,
"learning_rate": 1.9269662921348313e-05,
"loss": 0.846,
"step": 98
},
{
"epoch": 0.22247191011235956,
"grad_norm": 4.183433532714844,
"learning_rate": 1.946629213483146e-05,
"loss": 0.6581,
"step": 99
},
{
"epoch": 0.2247191011235955,
"grad_norm": 3.348114252090454,
"learning_rate": 1.9662921348314607e-05,
"loss": 0.3579,
"step": 100
},
{
"epoch": 0.22696629213483147,
"grad_norm": 4.055211544036865,
"learning_rate": 1.9859550561797752e-05,
"loss": 0.908,
"step": 101
},
{
"epoch": 0.2292134831460674,
"grad_norm": 1.0024710893630981,
"learning_rate": 2.0056179775280897e-05,
"loss": 0.0664,
"step": 102
},
{
"epoch": 0.23146067415730337,
"grad_norm": 3.582249641418457,
"learning_rate": 2.0252808988764042e-05,
"loss": 0.5411,
"step": 103
},
{
"epoch": 0.23370786516853934,
"grad_norm": 4.226349830627441,
"learning_rate": 2.044943820224719e-05,
"loss": 0.9163,
"step": 104
},
{
"epoch": 0.23595505617977527,
"grad_norm": 3.002727508544922,
"learning_rate": 2.0646067415730335e-05,
"loss": 0.7975,
"step": 105
},
{
"epoch": 0.23820224719101124,
"grad_norm": 3.5497515201568604,
"learning_rate": 2.084269662921348e-05,
"loss": 0.37,
"step": 106
},
{
"epoch": 0.24044943820224718,
"grad_norm": 4.381045341491699,
"learning_rate": 2.103932584269663e-05,
"loss": 0.8495,
"step": 107
},
{
"epoch": 0.24269662921348314,
"grad_norm": 3.926840305328369,
"learning_rate": 2.1235955056179773e-05,
"loss": 0.8073,
"step": 108
},
{
"epoch": 0.2449438202247191,
"grad_norm": 3.0835390090942383,
"learning_rate": 2.1432584269662922e-05,
"loss": 0.7563,
"step": 109
},
{
"epoch": 0.24719101123595505,
"grad_norm": 4.230669975280762,
"learning_rate": 2.1629213483146063e-05,
"loss": 0.6585,
"step": 110
},
{
"epoch": 0.24943820224719102,
"grad_norm": 2.8849070072174072,
"learning_rate": 2.1825842696629212e-05,
"loss": 0.3246,
"step": 111
},
{
"epoch": 0.251685393258427,
"grad_norm": 4.796951770782471,
"learning_rate": 2.2022471910112357e-05,
"loss": 0.9718,
"step": 112
},
{
"epoch": 0.2539325842696629,
"grad_norm": 4.60318660736084,
"learning_rate": 2.2219101123595505e-05,
"loss": 0.8584,
"step": 113
},
{
"epoch": 0.25617977528089886,
"grad_norm": 3.098703384399414,
"learning_rate": 2.241573033707865e-05,
"loss": 0.3385,
"step": 114
},
{
"epoch": 0.25842696629213485,
"grad_norm": 2.9519224166870117,
"learning_rate": 2.2612359550561795e-05,
"loss": 0.323,
"step": 115
},
{
"epoch": 0.2606741573033708,
"grad_norm": 2.913742780685425,
"learning_rate": 2.2808988764044944e-05,
"loss": 0.3359,
"step": 116
},
{
"epoch": 0.26292134831460673,
"grad_norm": 4.148440837860107,
"learning_rate": 2.300561797752809e-05,
"loss": 0.6955,
"step": 117
},
{
"epoch": 0.2651685393258427,
"grad_norm": 0.8463248610496521,
"learning_rate": 2.3202247191011234e-05,
"loss": 0.0539,
"step": 118
},
{
"epoch": 0.26741573033707866,
"grad_norm": 0.7284589409828186,
"learning_rate": 2.339887640449438e-05,
"loss": 0.0507,
"step": 119
},
{
"epoch": 0.2696629213483146,
"grad_norm": 3.615086317062378,
"learning_rate": 2.3595505617977527e-05,
"loss": 0.314,
"step": 120
},
{
"epoch": 0.27191011235955054,
"grad_norm": 5.229820728302002,
"learning_rate": 2.3792134831460672e-05,
"loss": 1.0339,
"step": 121
},
{
"epoch": 0.27415730337078653,
"grad_norm": 3.6847782135009766,
"learning_rate": 2.3988764044943817e-05,
"loss": 0.3158,
"step": 122
},
{
"epoch": 0.27640449438202247,
"grad_norm": 4.280517578125,
"learning_rate": 2.4185393258426965e-05,
"loss": 0.7809,
"step": 123
},
{
"epoch": 0.2786516853932584,
"grad_norm": 4.476150035858154,
"learning_rate": 2.438202247191011e-05,
"loss": 0.9516,
"step": 124
},
{
"epoch": 0.2808988764044944,
"grad_norm": 2.7380239963531494,
"learning_rate": 2.457865168539326e-05,
"loss": 0.3117,
"step": 125
},
{
"epoch": 0.28314606741573034,
"grad_norm": 3.9667162895202637,
"learning_rate": 2.47752808988764e-05,
"loss": 0.8366,
"step": 126
},
{
"epoch": 0.2853932584269663,
"grad_norm": 4.552999019622803,
"learning_rate": 2.497191011235955e-05,
"loss": 0.8033,
"step": 127
},
{
"epoch": 0.2876404494382023,
"grad_norm": 3.4238576889038086,
"learning_rate": 2.5168539325842694e-05,
"loss": 0.7253,
"step": 128
},
{
"epoch": 0.2898876404494382,
"grad_norm": 4.677807331085205,
"learning_rate": 2.5365168539325842e-05,
"loss": 0.8345,
"step": 129
},
{
"epoch": 0.29213483146067415,
"grad_norm": 4.282113075256348,
"learning_rate": 2.5561797752808987e-05,
"loss": 0.7532,
"step": 130
},
{
"epoch": 0.2943820224719101,
"grad_norm": 4.375221252441406,
"learning_rate": 2.5758426966292132e-05,
"loss": 0.8247,
"step": 131
},
{
"epoch": 0.2966292134831461,
"grad_norm": 3.2591633796691895,
"learning_rate": 2.595505617977528e-05,
"loss": 0.5175,
"step": 132
},
{
"epoch": 0.298876404494382,
"grad_norm": 4.146636962890625,
"learning_rate": 2.6151685393258425e-05,
"loss": 0.7813,
"step": 133
},
{
"epoch": 0.30112359550561796,
"grad_norm": 4.2413249015808105,
"learning_rate": 2.634831460674157e-05,
"loss": 0.6582,
"step": 134
},
{
"epoch": 0.30337078651685395,
"grad_norm": 4.541455268859863,
"learning_rate": 2.6544943820224715e-05,
"loss": 0.3484,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_VitaminC_cosine_accuracy": 0.560546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7956135272979736,
"eval_VitaminC_cosine_ap": 0.5505565383154402,
"eval_VitaminC_cosine_f1": 0.6684709066305818,
"eval_VitaminC_cosine_f1_threshold": 0.40466147661209106,
"eval_VitaminC_cosine_precision": 0.5061475409836066,
"eval_VitaminC_cosine_recall": 0.9840637450199203,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 312.2774658203125,
"eval_VitaminC_dot_ap": 0.5365135091766033,
"eval_VitaminC_dot_f1": 0.6684856753069577,
"eval_VitaminC_dot_f1_threshold": 157.33203125,
"eval_VitaminC_dot_precision": 0.508298755186722,
"eval_VitaminC_dot_recall": 0.9760956175298805,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 12.184114456176758,
"eval_VitaminC_euclidean_ap": 0.5517706579195627,
"eval_VitaminC_euclidean_f1": 0.6649006622516557,
"eval_VitaminC_euclidean_f1_threshold": 23.68879508972168,
"eval_VitaminC_euclidean_precision": 0.498015873015873,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 241.22061157226562,
"eval_VitaminC_manhattan_ap": 0.5494156168773414,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 510.2530212402344,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 312.2774658203125,
"eval_VitaminC_max_ap": 0.5517706579195627,
"eval_VitaminC_max_f1": 0.6684856753069577,
"eval_VitaminC_max_f1_threshold": 510.2530212402344,
"eval_VitaminC_max_precision": 0.508298755186722,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5517706579195627,
"eval_sts-test_pearson_cosine": 0.8812438499723412,
"eval_sts-test_pearson_dot": 0.8695651753004092,
"eval_sts-test_pearson_euclidean": 0.9036940037118162,
"eval_sts-test_pearson_manhattan": 0.9035516699922166,
"eval_sts-test_pearson_max": 0.9036940037118162,
"eval_sts-test_spearman_cosine": 0.9049742835092648,
"eval_sts-test_spearman_dot": 0.8707925987895928,
"eval_sts-test_spearman_euclidean": 0.9003956924537878,
"eval_sts-test_spearman_manhattan": 0.9002747745455083,
"eval_sts-test_spearman_max": 0.9049742835092648,
"eval_vitaminc-pairs_loss": 1.5520410537719727,
"eval_vitaminc-pairs_runtime": 1.8323,
"eval_vitaminc-pairs_samples_per_second": 58.943,
"eval_vitaminc-pairs_steps_per_second": 1.092,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_negation-triplets_loss": 0.9211694002151489,
"eval_negation-triplets_runtime": 0.2923,
"eval_negation-triplets_samples_per_second": 218.93,
"eval_negation-triplets_steps_per_second": 3.421,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_scitail-pairs-pos_loss": 0.07377135753631592,
"eval_scitail-pairs-pos_runtime": 0.3681,
"eval_scitail-pairs-pos_samples_per_second": 146.691,
"eval_scitail-pairs-pos_steps_per_second": 2.716,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_scitail-pairs-qa_loss": 0.00150959100574255,
"eval_scitail-pairs-qa_runtime": 0.5123,
"eval_scitail-pairs-qa_samples_per_second": 249.842,
"eval_scitail-pairs-qa_steps_per_second": 3.904,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_xsum-pairs_loss": 0.036599572747945786,
"eval_xsum-pairs_runtime": 2.7238,
"eval_xsum-pairs_samples_per_second": 46.994,
"eval_xsum-pairs_steps_per_second": 0.734,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_sciq_pairs_loss": 0.01615014858543873,
"eval_sciq_pairs_runtime": 2.8064,
"eval_sciq_pairs_samples_per_second": 45.61,
"eval_sciq_pairs_steps_per_second": 0.713,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_qasc_pairs_loss": 0.09235507994890213,
"eval_qasc_pairs_runtime": 0.6488,
"eval_qasc_pairs_samples_per_second": 197.276,
"eval_qasc_pairs_steps_per_second": 3.082,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_openbookqa_pairs_loss": 0.6891775727272034,
"eval_openbookqa_pairs_runtime": 0.5698,
"eval_openbookqa_pairs_samples_per_second": 224.641,
"eval_openbookqa_pairs_steps_per_second": 3.51,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_msmarco_pairs_loss": 0.16766037046909332,
"eval_msmarco_pairs_runtime": 1.4798,
"eval_msmarco_pairs_samples_per_second": 86.499,
"eval_msmarco_pairs_steps_per_second": 1.352,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_nq_pairs_loss": 0.09737721085548401,
"eval_nq_pairs_runtime": 2.3409,
"eval_nq_pairs_samples_per_second": 54.68,
"eval_nq_pairs_steps_per_second": 0.854,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_trivia_pairs_loss": 0.5458433032035828,
"eval_trivia_pairs_runtime": 3.5771,
"eval_trivia_pairs_samples_per_second": 35.783,
"eval_trivia_pairs_steps_per_second": 0.559,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_gooaq_pairs_loss": 0.3082329332828522,
"eval_gooaq_pairs_runtime": 0.9181,
"eval_gooaq_pairs_samples_per_second": 139.413,
"eval_gooaq_pairs_steps_per_second": 2.178,
"step": 135
},
{
"epoch": 0.30337078651685395,
"eval_paws-pos_loss": 0.02423396334052086,
"eval_paws-pos_runtime": 0.6827,
"eval_paws-pos_samples_per_second": 187.501,
"eval_paws-pos_steps_per_second": 2.93,
"step": 135
},
{
"epoch": 0.3056179775280899,
"grad_norm": 4.549901485443115,
"learning_rate": 2.6741573033707864e-05,
"loss": 0.7648,
"step": 136
},
{
"epoch": 0.30786516853932583,
"grad_norm": 3.225851535797119,
"learning_rate": 2.693820224719101e-05,
"loss": 0.7554,
"step": 137
},
{
"epoch": 0.3101123595505618,
"grad_norm": 0.6228423118591309,
"learning_rate": 2.7134831460674154e-05,
"loss": 0.0753,
"step": 138
},
{
"epoch": 0.31235955056179776,
"grad_norm": 3.12802791595459,
"learning_rate": 2.7331460674157302e-05,
"loss": 0.4987,
"step": 139
},
{
"epoch": 0.3146067415730337,
"grad_norm": 4.1997880935668945,
"learning_rate": 2.7528089887640447e-05,
"loss": 0.8543,
"step": 140
},
{
"epoch": 0.31685393258426964,
"grad_norm": 4.3362860679626465,
"learning_rate": 2.7724719101123595e-05,
"loss": 0.9425,
"step": 141
},
{
"epoch": 0.31910112359550563,
"grad_norm": 0.5599316954612732,
"learning_rate": 2.7921348314606737e-05,
"loss": 0.0472,
"step": 142
},
{
"epoch": 0.32134831460674157,
"grad_norm": 3.503603458404541,
"learning_rate": 2.8117977528089885e-05,
"loss": 0.848,
"step": 143
},
{
"epoch": 0.3235955056179775,
"grad_norm": 4.712310314178467,
"learning_rate": 2.831460674157303e-05,
"loss": 0.8946,
"step": 144
},
{
"epoch": 0.3258426966292135,
"grad_norm": 3.1823527812957764,
"learning_rate": 2.851123595505618e-05,
"loss": 0.7841,
"step": 145
},
{
"epoch": 0.32808988764044944,
"grad_norm": 4.423196315765381,
"learning_rate": 2.8707865168539324e-05,
"loss": 0.6653,
"step": 146
},
{
"epoch": 0.3303370786516854,
"grad_norm": 4.137822151184082,
"learning_rate": 2.890449438202247e-05,
"loss": 0.3522,
"step": 147
},
{
"epoch": 0.3325842696629214,
"grad_norm": 2.997777223587036,
"learning_rate": 2.9101123595505617e-05,
"loss": 0.4853,
"step": 148
},
{
"epoch": 0.3348314606741573,
"grad_norm": 2.89650559425354,
"learning_rate": 2.9297752808988762e-05,
"loss": 0.4726,
"step": 149
},
{
"epoch": 0.33707865168539325,
"grad_norm": 5.486624717712402,
"learning_rate": 2.9494382022471907e-05,
"loss": 0.8693,
"step": 150
},
{
"epoch": 0.3393258426966292,
"grad_norm": 4.800889015197754,
"learning_rate": 2.9691011235955052e-05,
"loss": 0.8124,
"step": 151
},
{
"epoch": 0.3415730337078652,
"grad_norm": 4.188066005706787,
"learning_rate": 2.98876404494382e-05,
"loss": 0.8206,
"step": 152
},
{
"epoch": 0.3438202247191011,
"grad_norm": 4.340461254119873,
"learning_rate": 3.0084269662921345e-05,
"loss": 0.9406,
"step": 153
},
{
"epoch": 0.34606741573033706,
"grad_norm": 4.658304214477539,
"learning_rate": 3.028089887640449e-05,
"loss": 0.7944,
"step": 154
},
{
"epoch": 0.34831460674157305,
"grad_norm": 0.6266987919807434,
"learning_rate": 3.047752808988764e-05,
"loss": 0.0766,
"step": 155
},
{
"epoch": 0.350561797752809,
"grad_norm": 4.252346515655518,
"learning_rate": 3.067415730337079e-05,
"loss": 0.8609,
"step": 156
},
{
"epoch": 0.35280898876404493,
"grad_norm": 4.9649658203125,
"learning_rate": 3.087078651685393e-05,
"loss": 1.0533,
"step": 157
},
{
"epoch": 0.3550561797752809,
"grad_norm": 4.485607624053955,
"learning_rate": 3.106741573033708e-05,
"loss": 0.8396,
"step": 158
},
{
"epoch": 0.35730337078651686,
"grad_norm": 3.241231918334961,
"learning_rate": 3.126404494382022e-05,
"loss": 0.7865,
"step": 159
},
{
"epoch": 0.3595505617977528,
"grad_norm": 6.846582889556885,
"learning_rate": 3.146067415730337e-05,
"loss": 2.4616,
"step": 160
},
{
"epoch": 0.36179775280898874,
"grad_norm": 0.5514687895774841,
"learning_rate": 3.165730337078651e-05,
"loss": 0.0556,
"step": 161
},
{
"epoch": 0.36404494382022473,
"grad_norm": 3.7877562046051025,
"learning_rate": 3.185393258426966e-05,
"loss": 0.3758,
"step": 162
},
{
"epoch": 0.36629213483146067,
"grad_norm": 5.397939682006836,
"learning_rate": 3.205056179775281e-05,
"loss": 0.9312,
"step": 163
},
{
"epoch": 0.3685393258426966,
"grad_norm": 4.301459312438965,
"learning_rate": 3.2247191011235954e-05,
"loss": 0.7993,
"step": 164
},
{
"epoch": 0.3707865168539326,
"grad_norm": 4.49428129196167,
"learning_rate": 3.24438202247191e-05,
"loss": 0.8104,
"step": 165
},
{
"epoch": 0.37303370786516854,
"grad_norm": 3.2210912704467773,
"learning_rate": 3.2640449438202244e-05,
"loss": 0.8199,
"step": 166
},
{
"epoch": 0.3752808988764045,
"grad_norm": 5.359859466552734,
"learning_rate": 3.283707865168539e-05,
"loss": 1.0724,
"step": 167
},
{
"epoch": 0.3775280898876405,
"grad_norm": 4.00059700012207,
"learning_rate": 3.3033707865168534e-05,
"loss": 0.3521,
"step": 168
},
{
"epoch": 0.3797752808988764,
"grad_norm": 4.418768882751465,
"learning_rate": 3.3230337078651685e-05,
"loss": 0.8536,
"step": 169
},
{
"epoch": 0.38202247191011235,
"grad_norm": 4.15454626083374,
"learning_rate": 3.342696629213483e-05,
"loss": 0.872,
"step": 170
},
{
"epoch": 0.3842696629213483,
"grad_norm": 3.8060054779052734,
"learning_rate": 3.3623595505617975e-05,
"loss": 0.8009,
"step": 171
},
{
"epoch": 0.3865168539325843,
"grad_norm": 3.584745407104492,
"learning_rate": 3.382022471910112e-05,
"loss": 0.7798,
"step": 172
},
{
"epoch": 0.3887640449438202,
"grad_norm": 4.861410140991211,
"learning_rate": 3.4016853932584265e-05,
"loss": 0.5953,
"step": 173
},
{
"epoch": 0.39101123595505616,
"grad_norm": 3.983793020248413,
"learning_rate": 3.421348314606741e-05,
"loss": 0.7562,
"step": 174
},
{
"epoch": 0.39325842696629215,
"grad_norm": 4.841738224029541,
"learning_rate": 3.4410112359550555e-05,
"loss": 0.7227,
"step": 175
},
{
"epoch": 0.3955056179775281,
"grad_norm": 4.787370204925537,
"learning_rate": 3.460674157303371e-05,
"loss": 0.8953,
"step": 176
},
{
"epoch": 0.39775280898876403,
"grad_norm": 4.337812900543213,
"learning_rate": 3.480337078651685e-05,
"loss": 0.7102,
"step": 177
},
{
"epoch": 0.4,
"grad_norm": 0.9599294662475586,
"learning_rate": 3.5e-05,
"loss": 0.0667,
"step": 178
},
{
"epoch": 0.40224719101123596,
"grad_norm": 0.6864398717880249,
"learning_rate": 3.4999863718440846e-05,
"loss": 0.0528,
"step": 179
},
{
"epoch": 0.4044943820224719,
"grad_norm": 4.738316059112549,
"learning_rate": 3.499945487641664e-05,
"loss": 0.7312,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8256886005401611,
"eval_VitaminC_cosine_ap": 0.5557251062538118,
"eval_VitaminC_cosine_f1": 0.6666666666666667,
"eval_VitaminC_cosine_f1_threshold": 0.4391498863697052,
"eval_VitaminC_cosine_precision": 0.5051334702258727,
"eval_VitaminC_cosine_recall": 0.9800796812749004,
"eval_VitaminC_dot_accuracy": 0.556640625,
"eval_VitaminC_dot_accuracy_threshold": 314.2790832519531,
"eval_VitaminC_dot_ap": 0.5397120960874565,
"eval_VitaminC_dot_f1": 0.6684636118598383,
"eval_VitaminC_dot_f1_threshold": 144.02464294433594,
"eval_VitaminC_dot_precision": 0.505091649694501,
"eval_VitaminC_dot_recall": 0.9880478087649402,
"eval_VitaminC_euclidean_accuracy": 0.560546875,
"eval_VitaminC_euclidean_accuracy_threshold": 13.859346389770508,
"eval_VitaminC_euclidean_ap": 0.5582755831276058,
"eval_VitaminC_euclidean_f1": 0.667605633802817,
"eval_VitaminC_euclidean_f1_threshold": 18.874879837036133,
"eval_VitaminC_euclidean_precision": 0.5163398692810458,
"eval_VitaminC_euclidean_recall": 0.9442231075697212,
"eval_VitaminC_manhattan_accuracy": 0.560546875,
"eval_VitaminC_manhattan_accuracy_threshold": 239.6153564453125,
"eval_VitaminC_manhattan_ap": 0.5569115785564898,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 501.158447265625,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.560546875,
"eval_VitaminC_max_accuracy_threshold": 314.2790832519531,
"eval_VitaminC_max_ap": 0.5582755831276058,
"eval_VitaminC_max_f1": 0.6684636118598383,
"eval_VitaminC_max_f1_threshold": 501.158447265625,
"eval_VitaminC_max_precision": 0.5163398692810458,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5582755831276058,
"eval_sts-test_pearson_cosine": 0.8825432226222443,
"eval_sts-test_pearson_dot": 0.8720125241659442,
"eval_sts-test_pearson_euclidean": 0.9053801707227738,
"eval_sts-test_pearson_manhattan": 0.9060044572091359,
"eval_sts-test_pearson_max": 0.9060044572091359,
"eval_sts-test_spearman_cosine": 0.9055030196626042,
"eval_sts-test_spearman_dot": 0.8729395405548455,
"eval_sts-test_spearman_euclidean": 0.9013990604854444,
"eval_sts-test_spearman_manhattan": 0.9021052353902007,
"eval_sts-test_spearman_max": 0.9055030196626042,
"eval_vitaminc-pairs_loss": 1.5215541124343872,
"eval_vitaminc-pairs_runtime": 1.8745,
"eval_vitaminc-pairs_samples_per_second": 57.614,
"eval_vitaminc-pairs_steps_per_second": 1.067,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_negation-triplets_loss": 0.9813100099563599,
"eval_negation-triplets_runtime": 0.3009,
"eval_negation-triplets_samples_per_second": 212.73,
"eval_negation-triplets_steps_per_second": 3.324,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_scitail-pairs-pos_loss": 0.09161412715911865,
"eval_scitail-pairs-pos_runtime": 0.3936,
"eval_scitail-pairs-pos_samples_per_second": 137.188,
"eval_scitail-pairs-pos_steps_per_second": 2.541,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_scitail-pairs-qa_loss": 0.0013133077882230282,
"eval_scitail-pairs-qa_runtime": 0.5286,
"eval_scitail-pairs-qa_samples_per_second": 242.147,
"eval_scitail-pairs-qa_steps_per_second": 3.784,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_xsum-pairs_loss": 0.049595557153224945,
"eval_xsum-pairs_runtime": 2.7447,
"eval_xsum-pairs_samples_per_second": 46.636,
"eval_xsum-pairs_steps_per_second": 0.729,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_sciq_pairs_loss": 0.017273178324103355,
"eval_sciq_pairs_runtime": 2.8401,
"eval_sciq_pairs_samples_per_second": 45.069,
"eval_sciq_pairs_steps_per_second": 0.704,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_qasc_pairs_loss": 0.09485691040754318,
"eval_qasc_pairs_runtime": 0.6594,
"eval_qasc_pairs_samples_per_second": 194.113,
"eval_qasc_pairs_steps_per_second": 3.033,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_openbookqa_pairs_loss": 0.7253161072731018,
"eval_openbookqa_pairs_runtime": 0.5801,
"eval_openbookqa_pairs_samples_per_second": 220.633,
"eval_openbookqa_pairs_steps_per_second": 3.447,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_msmarco_pairs_loss": 0.17383378744125366,
"eval_msmarco_pairs_runtime": 1.4824,
"eval_msmarco_pairs_samples_per_second": 86.346,
"eval_msmarco_pairs_steps_per_second": 1.349,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_nq_pairs_loss": 0.10324681550264359,
"eval_nq_pairs_runtime": 2.3542,
"eval_nq_pairs_samples_per_second": 54.372,
"eval_nq_pairs_steps_per_second": 0.85,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_trivia_pairs_loss": 0.5358972549438477,
"eval_trivia_pairs_runtime": 3.5881,
"eval_trivia_pairs_samples_per_second": 35.673,
"eval_trivia_pairs_steps_per_second": 0.557,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_gooaq_pairs_loss": 0.3070329427719116,
"eval_gooaq_pairs_runtime": 0.9009,
"eval_gooaq_pairs_samples_per_second": 142.079,
"eval_gooaq_pairs_steps_per_second": 2.22,
"step": 180
},
{
"epoch": 0.4044943820224719,
"eval_paws-pos_loss": 0.024055125191807747,
"eval_paws-pos_runtime": 0.6792,
"eval_paws-pos_samples_per_second": 188.469,
"eval_paws-pos_steps_per_second": 2.945,
"step": 180
},
{
"epoch": 0.4067415730337079,
"grad_norm": 5.063413143157959,
"learning_rate": 3.4998773481887046e-05,
"loss": 0.7809,
"step": 181
},
{
"epoch": 0.40898876404494383,
"grad_norm": 4.108719825744629,
"learning_rate": 3.499781954811798e-05,
"loss": 0.8333,
"step": 182
},
{
"epoch": 0.41123595505617977,
"grad_norm": 4.6362104415893555,
"learning_rate": 3.499659309368139e-05,
"loss": 0.9283,
"step": 183
},
{
"epoch": 0.4134831460674157,
"grad_norm": 4.432968616485596,
"learning_rate": 3.499509414245486e-05,
"loss": 0.7011,
"step": 184
},
{
"epoch": 0.4157303370786517,
"grad_norm": 4.040768623352051,
"learning_rate": 3.4993322723621164e-05,
"loss": 0.8413,
"step": 185
},
{
"epoch": 0.41797752808988764,
"grad_norm": 5.797406196594238,
"learning_rate": 3.499127887166769e-05,
"loss": 1.1679,
"step": 186
},
{
"epoch": 0.4202247191011236,
"grad_norm": 4.275143623352051,
"learning_rate": 3.498896262638578e-05,
"loss": 0.8701,
"step": 187
},
{
"epoch": 0.42247191011235957,
"grad_norm": 3.920672655105591,
"learning_rate": 3.498637403286993e-05,
"loss": 0.8139,
"step": 188
},
{
"epoch": 0.4247191011235955,
"grad_norm": 4.049210071563721,
"learning_rate": 3.498351314151693e-05,
"loss": 0.664,
"step": 189
},
{
"epoch": 0.42696629213483145,
"grad_norm": 4.007586479187012,
"learning_rate": 3.498038000802489e-05,
"loss": 0.3835,
"step": 190
},
{
"epoch": 0.42921348314606744,
"grad_norm": 3.7303507328033447,
"learning_rate": 3.497697469339215e-05,
"loss": 0.8516,
"step": 191
},
{
"epoch": 0.4314606741573034,
"grad_norm": 2.96820330619812,
"learning_rate": 3.497329726391606e-05,
"loss": 0.5479,
"step": 192
},
{
"epoch": 0.4337078651685393,
"grad_norm": 5.242271423339844,
"learning_rate": 3.496934779119175e-05,
"loss": 0.8642,
"step": 193
},
{
"epoch": 0.43595505617977526,
"grad_norm": 2.740006685256958,
"learning_rate": 3.496512635211069e-05,
"loss": 0.3121,
"step": 194
},
{
"epoch": 0.43820224719101125,
"grad_norm": 4.162242889404297,
"learning_rate": 3.496063302885921e-05,
"loss": 0.6932,
"step": 195
},
{
"epoch": 0.4404494382022472,
"grad_norm": 0.632938027381897,
"learning_rate": 3.495586790891689e-05,
"loss": 0.0647,
"step": 196
},
{
"epoch": 0.44269662921348313,
"grad_norm": 4.595058917999268,
"learning_rate": 3.495083108505487e-05,
"loss": 0.8173,
"step": 197
},
{
"epoch": 0.4449438202247191,
"grad_norm": 3.102372646331787,
"learning_rate": 3.494552265533404e-05,
"loss": 0.3122,
"step": 198
},
{
"epoch": 0.44719101123595506,
"grad_norm": 4.9895830154418945,
"learning_rate": 3.493994272310313e-05,
"loss": 0.7852,
"step": 199
},
{
"epoch": 0.449438202247191,
"grad_norm": 4.032258987426758,
"learning_rate": 3.493409139699669e-05,
"loss": 0.811,
"step": 200
},
{
"epoch": 0.451685393258427,
"grad_norm": 4.17324161529541,
"learning_rate": 3.4927968790932973e-05,
"loss": 0.7564,
"step": 201
},
{
"epoch": 0.45393258426966293,
"grad_norm": 0.49707159399986267,
"learning_rate": 3.492157502411174e-05,
"loss": 0.0541,
"step": 202
},
{
"epoch": 0.45617977528089887,
"grad_norm": 3.847059965133667,
"learning_rate": 3.491491022101194e-05,
"loss": 0.9085,
"step": 203
},
{
"epoch": 0.4584269662921348,
"grad_norm": 4.565647602081299,
"learning_rate": 3.4907974511389224e-05,
"loss": 0.8416,
"step": 204
},
{
"epoch": 0.4606741573033708,
"grad_norm": 0.8872150778770447,
"learning_rate": 3.4900768030273515e-05,
"loss": 0.0569,
"step": 205
},
{
"epoch": 0.46292134831460674,
"grad_norm": 3.2797999382019043,
"learning_rate": 3.4893290917966305e-05,
"loss": 0.7998,
"step": 206
},
{
"epoch": 0.4651685393258427,
"grad_norm": 5.683195114135742,
"learning_rate": 3.4885543320037956e-05,
"loss": 0.7218,
"step": 207
},
{
"epoch": 0.46741573033707867,
"grad_norm": 5.348382949829102,
"learning_rate": 3.4877525387324844e-05,
"loss": 0.9292,
"step": 208
},
{
"epoch": 0.4696629213483146,
"grad_norm": 4.3047099113464355,
"learning_rate": 3.486923727592647e-05,
"loss": 0.8279,
"step": 209
},
{
"epoch": 0.47191011235955055,
"grad_norm": 4.425166130065918,
"learning_rate": 3.486067914720236e-05,
"loss": 0.8452,
"step": 210
},
{
"epoch": 0.47415730337078654,
"grad_norm": 5.7947916984558105,
"learning_rate": 3.485185116776896e-05,
"loss": 1.1099,
"step": 211
},
{
"epoch": 0.4764044943820225,
"grad_norm": 4.257087230682373,
"learning_rate": 3.4842753509496385e-05,
"loss": 0.9436,
"step": 212
},
{
"epoch": 0.4786516853932584,
"grad_norm": 4.357375144958496,
"learning_rate": 3.483338634950507e-05,
"loss": 0.8389,
"step": 213
},
{
"epoch": 0.48089887640449436,
"grad_norm": 3.666268825531006,
"learning_rate": 3.482374987016233e-05,
"loss": 0.3297,
"step": 214
},
{
"epoch": 0.48314606741573035,
"grad_norm": 3.0593607425689697,
"learning_rate": 3.481384425907879e-05,
"loss": 0.8098,
"step": 215
},
{
"epoch": 0.4853932584269663,
"grad_norm": 0.4539957344532013,
"learning_rate": 3.480366970910476e-05,
"loss": 0.0386,
"step": 216
},
{
"epoch": 0.48764044943820223,
"grad_norm": 3.3102784156799316,
"learning_rate": 3.479322641832646e-05,
"loss": 0.7752,
"step": 217
},
{
"epoch": 0.4898876404494382,
"grad_norm": 3.8798298835754395,
"learning_rate": 3.4782514590062165e-05,
"loss": 0.8071,
"step": 218
},
{
"epoch": 0.49213483146067416,
"grad_norm": 6.300197124481201,
"learning_rate": 3.4771534432858255e-05,
"loss": 2.571,
"step": 219
},
{
"epoch": 0.4943820224719101,
"grad_norm": 4.163381099700928,
"learning_rate": 3.4760286160485145e-05,
"loss": 0.5912,
"step": 220
},
{
"epoch": 0.4966292134831461,
"grad_norm": 3.5834686756134033,
"learning_rate": 3.474876999193314e-05,
"loss": 0.3792,
"step": 221
},
{
"epoch": 0.49887640449438203,
"grad_norm": 4.494593143463135,
"learning_rate": 3.473698615140816e-05,
"loss": 0.7456,
"step": 222
},
{
"epoch": 0.501123595505618,
"grad_norm": 3.909142017364502,
"learning_rate": 3.4724934868327366e-05,
"loss": 0.7207,
"step": 223
},
{
"epoch": 0.503370786516854,
"grad_norm": 3.0387282371520996,
"learning_rate": 3.47126163773147e-05,
"loss": 0.3254,
"step": 224
},
{
"epoch": 0.5056179775280899,
"grad_norm": 0.6529088616371155,
"learning_rate": 3.4700030918196344e-05,
"loss": 0.0461,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8303268551826477,
"eval_VitaminC_cosine_ap": 0.5509523400010791,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.2634955048561096,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 281.011474609375,
"eval_VitaminC_dot_ap": 0.5281394234221073,
"eval_VitaminC_dot_f1": 0.6711772665764546,
"eval_VitaminC_dot_f1_threshold": 141.11529541015625,
"eval_VitaminC_dot_precision": 0.5081967213114754,
"eval_VitaminC_dot_recall": 0.9880478087649402,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 11.405111312866211,
"eval_VitaminC_euclidean_ap": 0.5573376843815556,
"eval_VitaminC_euclidean_f1": 0.6640211640211641,
"eval_VitaminC_euclidean_f1_threshold": 24.63976287841797,
"eval_VitaminC_euclidean_precision": 0.497029702970297,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.556640625,
"eval_VitaminC_manhattan_accuracy_threshold": 349.33441162109375,
"eval_VitaminC_manhattan_ap": 0.5561637270496671,
"eval_VitaminC_manhattan_f1": 0.6657824933687002,
"eval_VitaminC_manhattan_f1_threshold": 505.0340270996094,
"eval_VitaminC_manhattan_precision": 0.4990059642147117,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.556640625,
"eval_VitaminC_max_accuracy_threshold": 349.33441162109375,
"eval_VitaminC_max_ap": 0.5573376843815556,
"eval_VitaminC_max_f1": 0.6711772665764546,
"eval_VitaminC_max_f1_threshold": 505.0340270996094,
"eval_VitaminC_max_precision": 0.5081967213114754,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5573376843815556,
"eval_sts-test_pearson_cosine": 0.8848200869109313,
"eval_sts-test_pearson_dot": 0.8723563516714744,
"eval_sts-test_pearson_euclidean": 0.9070688973489409,
"eval_sts-test_pearson_manhattan": 0.9073961699007848,
"eval_sts-test_pearson_max": 0.9073961699007848,
"eval_sts-test_spearman_cosine": 0.9050875937031079,
"eval_sts-test_spearman_dot": 0.8699468894518183,
"eval_sts-test_spearman_euclidean": 0.9020747597811932,
"eval_sts-test_spearman_manhattan": 0.9019608230696907,
"eval_sts-test_spearman_max": 0.9050875937031079,
"eval_vitaminc-pairs_loss": 1.4897230863571167,
"eval_vitaminc-pairs_runtime": 1.8927,
"eval_vitaminc-pairs_samples_per_second": 57.062,
"eval_vitaminc-pairs_steps_per_second": 1.057,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_negation-triplets_loss": 0.9457363486289978,
"eval_negation-triplets_runtime": 0.3019,
"eval_negation-triplets_samples_per_second": 212.002,
"eval_negation-triplets_steps_per_second": 3.313,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_scitail-pairs-pos_loss": 0.07606112211942673,
"eval_scitail-pairs-pos_runtime": 0.3972,
"eval_scitail-pairs-pos_samples_per_second": 135.938,
"eval_scitail-pairs-pos_steps_per_second": 2.517,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_scitail-pairs-qa_loss": 0.001212431932799518,
"eval_scitail-pairs-qa_runtime": 0.5348,
"eval_scitail-pairs-qa_samples_per_second": 239.347,
"eval_scitail-pairs-qa_steps_per_second": 3.74,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_xsum-pairs_loss": 0.02758924476802349,
"eval_xsum-pairs_runtime": 2.767,
"eval_xsum-pairs_samples_per_second": 46.26,
"eval_xsum-pairs_steps_per_second": 0.723,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_sciq_pairs_loss": 0.016450434923171997,
"eval_sciq_pairs_runtime": 2.8812,
"eval_sciq_pairs_samples_per_second": 44.426,
"eval_sciq_pairs_steps_per_second": 0.694,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_qasc_pairs_loss": 0.09214109182357788,
"eval_qasc_pairs_runtime": 0.6597,
"eval_qasc_pairs_samples_per_second": 194.029,
"eval_qasc_pairs_steps_per_second": 3.032,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_openbookqa_pairs_loss": 0.7429620623588562,
"eval_openbookqa_pairs_runtime": 0.5947,
"eval_openbookqa_pairs_samples_per_second": 215.22,
"eval_openbookqa_pairs_steps_per_second": 3.363,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_msmarco_pairs_loss": 0.17871831357479095,
"eval_msmarco_pairs_runtime": 1.5003,
"eval_msmarco_pairs_samples_per_second": 85.314,
"eval_msmarco_pairs_steps_per_second": 1.333,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_nq_pairs_loss": 0.09803248196840286,
"eval_nq_pairs_runtime": 2.3587,
"eval_nq_pairs_samples_per_second": 54.267,
"eval_nq_pairs_steps_per_second": 0.848,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_trivia_pairs_loss": 0.5323590636253357,
"eval_trivia_pairs_runtime": 3.6206,
"eval_trivia_pairs_samples_per_second": 35.354,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_gooaq_pairs_loss": 0.2667708098888397,
"eval_gooaq_pairs_runtime": 0.9171,
"eval_gooaq_pairs_samples_per_second": 139.573,
"eval_gooaq_pairs_steps_per_second": 2.181,
"step": 225
},
{
"epoch": 0.5056179775280899,
"eval_paws-pos_loss": 0.0236118845641613,
"eval_paws-pos_runtime": 0.6973,
"eval_paws-pos_samples_per_second": 183.563,
"eval_paws-pos_steps_per_second": 2.868,
"step": 225
},
{
"epoch": 0.5078651685393258,
"grad_norm": 3.5867371559143066,
"learning_rate": 3.4687178735995997e-05,
"loss": 0.347,
"step": 226
},
{
"epoch": 0.5101123595505618,
"grad_norm": 0.37994861602783203,
"learning_rate": 3.467406008093016e-05,
"loss": 0.0417,
"step": 227
},
{
"epoch": 0.5123595505617977,
"grad_norm": 4.081336975097656,
"learning_rate": 3.466067520840322e-05,
"loss": 0.7783,
"step": 228
},
{
"epoch": 0.5146067415730337,
"grad_norm": 4.306976795196533,
"learning_rate": 3.46470243790025e-05,
"loss": 0.9027,
"step": 229
},
{
"epoch": 0.5168539325842697,
"grad_norm": 4.0280022621154785,
"learning_rate": 3.4633107858493206e-05,
"loss": 0.7166,
"step": 230
},
{
"epoch": 0.5191011235955056,
"grad_norm": 3.4807679653167725,
"learning_rate": 3.461892591781319e-05,
"loss": 0.705,
"step": 231
},
{
"epoch": 0.5213483146067416,
"grad_norm": 4.166563510894775,
"learning_rate": 3.4604478833067756e-05,
"loss": 0.8425,
"step": 232
},
{
"epoch": 0.5235955056179775,
"grad_norm": 3.828537940979004,
"learning_rate": 3.4589766885524204e-05,
"loss": 0.5362,
"step": 233
},
{
"epoch": 0.5258426966292135,
"grad_norm": 4.316190242767334,
"learning_rate": 3.4574790361606435e-05,
"loss": 0.7869,
"step": 234
},
{
"epoch": 0.5280898876404494,
"grad_norm": 4.244805335998535,
"learning_rate": 3.4559549552889285e-05,
"loss": 0.88,
"step": 235
},
{
"epoch": 0.5303370786516854,
"grad_norm": 4.208700656890869,
"learning_rate": 3.454404475609294e-05,
"loss": 0.8077,
"step": 236
},
{
"epoch": 0.5325842696629214,
"grad_norm": 3.1473183631896973,
"learning_rate": 3.4528276273077094e-05,
"loss": 0.8145,
"step": 237
},
{
"epoch": 0.5348314606741573,
"grad_norm": 3.798297166824341,
"learning_rate": 3.4512244410835094e-05,
"loss": 0.78,
"step": 238
},
{
"epoch": 0.5370786516853933,
"grad_norm": 0.535529375076294,
"learning_rate": 3.449594948148796e-05,
"loss": 0.0536,
"step": 239
},
{
"epoch": 0.5393258426966292,
"grad_norm": 3.2119970321655273,
"learning_rate": 3.447939180227833e-05,
"loss": 0.7975,
"step": 240
},
{
"epoch": 0.5415730337078651,
"grad_norm": 4.725860118865967,
"learning_rate": 3.446257169556425e-05,
"loss": 0.8932,
"step": 241
},
{
"epoch": 0.5438202247191011,
"grad_norm": 3.867676258087158,
"learning_rate": 3.4445489488812906e-05,
"loss": 0.3386,
"step": 242
},
{
"epoch": 0.5460674157303371,
"grad_norm": 3.981114387512207,
"learning_rate": 3.4428145514594274e-05,
"loss": 0.7741,
"step": 243
},
{
"epoch": 0.5483146067415731,
"grad_norm": 4.034990310668945,
"learning_rate": 3.4410540110574616e-05,
"loss": 0.7439,
"step": 244
},
{
"epoch": 0.550561797752809,
"grad_norm": 4.209812641143799,
"learning_rate": 3.4392673619509916e-05,
"loss": 0.7999,
"step": 245
},
{
"epoch": 0.5528089887640449,
"grad_norm": 3.942631244659424,
"learning_rate": 3.437454638923921e-05,
"loss": 0.8542,
"step": 246
},
{
"epoch": 0.5550561797752809,
"grad_norm": 4.087955951690674,
"learning_rate": 3.435615877267783e-05,
"loss": 0.6992,
"step": 247
},
{
"epoch": 0.5573033707865168,
"grad_norm": 3.885822057723999,
"learning_rate": 3.4337511127810466e-05,
"loss": 0.8579,
"step": 248
},
{
"epoch": 0.5595505617977528,
"grad_norm": 5.198770523071289,
"learning_rate": 3.431860381768431e-05,
"loss": 1.0221,
"step": 249
},
{
"epoch": 0.5617977528089888,
"grad_norm": 4.321418285369873,
"learning_rate": 3.4299437210401866e-05,
"loss": 0.699,
"step": 250
},
{
"epoch": 0.5640449438202247,
"grad_norm": 3.1992154121398926,
"learning_rate": 3.4280011679113884e-05,
"loss": 0.8523,
"step": 251
},
{
"epoch": 0.5662921348314607,
"grad_norm": 4.94226598739624,
"learning_rate": 3.4260327602012027e-05,
"loss": 1.0307,
"step": 252
},
{
"epoch": 0.5685393258426966,
"grad_norm": 3.958935499191284,
"learning_rate": 3.424038536232154e-05,
"loss": 0.846,
"step": 253
},
{
"epoch": 0.5707865168539326,
"grad_norm": 4.023487091064453,
"learning_rate": 3.4220185348293775e-05,
"loss": 0.8361,
"step": 254
},
{
"epoch": 0.5730337078651685,
"grad_norm": 3.275102138519287,
"learning_rate": 3.4199727953198665e-05,
"loss": 0.8224,
"step": 255
},
{
"epoch": 0.5752808988764045,
"grad_norm": 3.6130261421203613,
"learning_rate": 3.417901357531701e-05,
"loss": 0.5301,
"step": 256
},
{
"epoch": 0.5775280898876405,
"grad_norm": 4.571770668029785,
"learning_rate": 3.415804261793277e-05,
"loss": 0.3795,
"step": 257
},
{
"epoch": 0.5797752808988764,
"grad_norm": 3.1884663105010986,
"learning_rate": 3.413681548932521e-05,
"loss": 0.5434,
"step": 258
},
{
"epoch": 0.5820224719101124,
"grad_norm": 4.795211315155029,
"learning_rate": 3.411533260276091e-05,
"loss": 0.847,
"step": 259
},
{
"epoch": 0.5842696629213483,
"grad_norm": 4.761318206787109,
"learning_rate": 3.409359437648579e-05,
"loss": 0.7323,
"step": 260
},
{
"epoch": 0.5865168539325842,
"grad_norm": 4.4683098793029785,
"learning_rate": 3.407160123371687e-05,
"loss": 0.6606,
"step": 261
},
{
"epoch": 0.5887640449438202,
"grad_norm": 0.7677178382873535,
"learning_rate": 3.404935360263415e-05,
"loss": 0.0543,
"step": 262
},
{
"epoch": 0.5910112359550562,
"grad_norm": 4.110381126403809,
"learning_rate": 3.4026851916372166e-05,
"loss": 0.6709,
"step": 263
},
{
"epoch": 0.5932584269662922,
"grad_norm": 4.766375541687012,
"learning_rate": 3.400409661301162e-05,
"loss": 0.809,
"step": 264
},
{
"epoch": 0.5955056179775281,
"grad_norm": 5.389264106750488,
"learning_rate": 3.398108813557082e-05,
"loss": 1.0391,
"step": 265
},
{
"epoch": 0.597752808988764,
"grad_norm": 3.8780810832977295,
"learning_rate": 3.3957826931997094e-05,
"loss": 0.7396,
"step": 266
},
{
"epoch": 0.6,
"grad_norm": 4.399974822998047,
"learning_rate": 3.393431345515801e-05,
"loss": 0.7839,
"step": 267
},
{
"epoch": 0.6022471910112359,
"grad_norm": 3.2098612785339355,
"learning_rate": 3.391054816283262e-05,
"loss": 0.3054,
"step": 268
},
{
"epoch": 0.604494382022472,
"grad_norm": 3.606182098388672,
"learning_rate": 3.3886531517702505e-05,
"loss": 0.5258,
"step": 269
},
{
"epoch": 0.6067415730337079,
"grad_norm": 4.3564934730529785,
"learning_rate": 3.3862263987342784e-05,
"loss": 0.7367,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_VitaminC_cosine_accuracy": 0.552734375,
"eval_VitaminC_cosine_accuracy_threshold": 0.814909815788269,
"eval_VitaminC_cosine_ap": 0.5506214433093293,
"eval_VitaminC_cosine_f1": 0.664886515353805,
"eval_VitaminC_cosine_f1_threshold": 0.3506072461605072,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 0.9920318725099602,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 316.90899658203125,
"eval_VitaminC_dot_ap": 0.5353657977329522,
"eval_VitaminC_dot_f1": 0.6666666666666667,
"eval_VitaminC_dot_f1_threshold": 155.67796325683594,
"eval_VitaminC_dot_precision": 0.506198347107438,
"eval_VitaminC_dot_recall": 0.9760956175298805,
"eval_VitaminC_euclidean_accuracy": 0.55078125,
"eval_VitaminC_euclidean_accuracy_threshold": 10.77621841430664,
"eval_VitaminC_euclidean_ap": 0.550546292530568,
"eval_VitaminC_euclidean_f1": 0.6666666666666666,
"eval_VitaminC_euclidean_f1_threshold": 24.22284698486328,
"eval_VitaminC_euclidean_precision": 0.5,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 335.6986389160156,
"eval_VitaminC_manhattan_ap": 0.5497325043939846,
"eval_VitaminC_manhattan_f1": 0.6640211640211641,
"eval_VitaminC_manhattan_f1_threshold": 513.494873046875,
"eval_VitaminC_manhattan_precision": 0.497029702970297,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.5546875,
"eval_VitaminC_max_accuracy_threshold": 335.6986389160156,
"eval_VitaminC_max_ap": 0.5506214433093293,
"eval_VitaminC_max_f1": 0.6666666666666667,
"eval_VitaminC_max_f1_threshold": 513.494873046875,
"eval_VitaminC_max_precision": 0.506198347107438,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5506214433093293,
"eval_sts-test_pearson_cosine": 0.8848372816940555,
"eval_sts-test_pearson_dot": 0.8774995772730847,
"eval_sts-test_pearson_euclidean": 0.9058906663416005,
"eval_sts-test_pearson_manhattan": 0.9066316554236529,
"eval_sts-test_pearson_max": 0.9066316554236529,
"eval_sts-test_spearman_cosine": 0.9085018016884417,
"eval_sts-test_spearman_dot": 0.8776881864036095,
"eval_sts-test_spearman_euclidean": 0.903223569412372,
"eval_sts-test_spearman_manhattan": 0.9037578547221237,
"eval_sts-test_spearman_max": 0.9085018016884417,
"eval_vitaminc-pairs_loss": 1.4935871362686157,
"eval_vitaminc-pairs_runtime": 1.8963,
"eval_vitaminc-pairs_samples_per_second": 56.952,
"eval_vitaminc-pairs_steps_per_second": 1.055,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_negation-triplets_loss": 0.9505463242530823,
"eval_negation-triplets_runtime": 0.3041,
"eval_negation-triplets_samples_per_second": 210.485,
"eval_negation-triplets_steps_per_second": 3.289,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_scitail-pairs-pos_loss": 0.09635873883962631,
"eval_scitail-pairs-pos_runtime": 0.4048,
"eval_scitail-pairs-pos_samples_per_second": 133.396,
"eval_scitail-pairs-pos_steps_per_second": 2.47,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_scitail-pairs-qa_loss": 0.0009468490607105196,
"eval_scitail-pairs-qa_runtime": 0.5341,
"eval_scitail-pairs-qa_samples_per_second": 239.65,
"eval_scitail-pairs-qa_steps_per_second": 3.745,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_xsum-pairs_loss": 0.026903513818979263,
"eval_xsum-pairs_runtime": 2.7518,
"eval_xsum-pairs_samples_per_second": 46.514,
"eval_xsum-pairs_steps_per_second": 0.727,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_sciq_pairs_loss": 0.01619444414973259,
"eval_sciq_pairs_runtime": 2.8856,
"eval_sciq_pairs_samples_per_second": 44.358,
"eval_sciq_pairs_steps_per_second": 0.693,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_qasc_pairs_loss": 0.09130185097455978,
"eval_qasc_pairs_runtime": 0.6645,
"eval_qasc_pairs_samples_per_second": 192.631,
"eval_qasc_pairs_steps_per_second": 3.01,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_openbookqa_pairs_loss": 0.7336423397064209,
"eval_openbookqa_pairs_runtime": 0.5935,
"eval_openbookqa_pairs_samples_per_second": 215.687,
"eval_openbookqa_pairs_steps_per_second": 3.37,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_msmarco_pairs_loss": 0.15868164598941803,
"eval_msmarco_pairs_runtime": 1.5086,
"eval_msmarco_pairs_samples_per_second": 84.844,
"eval_msmarco_pairs_steps_per_second": 1.326,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_nq_pairs_loss": 0.10780799388885498,
"eval_nq_pairs_runtime": 2.3746,
"eval_nq_pairs_samples_per_second": 53.905,
"eval_nq_pairs_steps_per_second": 0.842,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_trivia_pairs_loss": 0.49691149592399597,
"eval_trivia_pairs_runtime": 3.5992,
"eval_trivia_pairs_samples_per_second": 35.563,
"eval_trivia_pairs_steps_per_second": 0.556,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_gooaq_pairs_loss": 0.3025541603565216,
"eval_gooaq_pairs_runtime": 0.9181,
"eval_gooaq_pairs_samples_per_second": 139.423,
"eval_gooaq_pairs_steps_per_second": 2.178,
"step": 270
},
{
"epoch": 0.6067415730337079,
"eval_paws-pos_loss": 0.024440350010991096,
"eval_paws-pos_runtime": 0.7046,
"eval_paws-pos_samples_per_second": 181.67,
"eval_paws-pos_steps_per_second": 2.839,
"step": 270
},
{
"epoch": 0.6089887640449438,
"grad_norm": 3.21183705329895,
"learning_rate": 3.383774604421301e-05,
"loss": 0.747,
"step": 271
},
{
"epoch": 0.6112359550561798,
"grad_norm": 4.403411865234375,
"learning_rate": 3.3812978165647975e-05,
"loss": 0.7855,
"step": 272
},
{
"epoch": 0.6134831460674157,
"grad_norm": 0.46612274646759033,
"learning_rate": 3.3787960833848405e-05,
"loss": 0.0473,
"step": 273
},
{
"epoch": 0.6157303370786517,
"grad_norm": 3.30610990524292,
"learning_rate": 3.3762694535871584e-05,
"loss": 0.4378,
"step": 274
},
{
"epoch": 0.6179775280898876,
"grad_norm": 3.7408640384674072,
"learning_rate": 3.373717976362187e-05,
"loss": 0.8767,
"step": 275
},
{
"epoch": 0.6202247191011236,
"grad_norm": 5.345012187957764,
"learning_rate": 3.3711417013841105e-05,
"loss": 1.0345,
"step": 276
},
{
"epoch": 0.6224719101123596,
"grad_norm": 3.518765449523926,
"learning_rate": 3.368540678809897e-05,
"loss": 0.5182,
"step": 277
},
{
"epoch": 0.6247191011235955,
"grad_norm": 6.666887283325195,
"learning_rate": 3.3659149592783186e-05,
"loss": 2.5949,
"step": 278
},
{
"epoch": 0.6269662921348315,
"grad_norm": 3.197411298751831,
"learning_rate": 3.363264593908969e-05,
"loss": 0.833,
"step": 279
},
{
"epoch": 0.6292134831460674,
"grad_norm": 0.6012090444564819,
"learning_rate": 3.360589634301267e-05,
"loss": 0.0778,
"step": 280
},
{
"epoch": 0.6314606741573033,
"grad_norm": 4.5016188621521,
"learning_rate": 3.357890132533449e-05,
"loss": 0.8048,
"step": 281
},
{
"epoch": 0.6337078651685393,
"grad_norm": 3.865889072418213,
"learning_rate": 3.35516614116156e-05,
"loss": 0.7524,
"step": 282
},
{
"epoch": 0.6359550561797753,
"grad_norm": 3.2998361587524414,
"learning_rate": 3.3524177132184266e-05,
"loss": 0.3246,
"step": 283
},
{
"epoch": 0.6382022471910113,
"grad_norm": 0.6418587565422058,
"learning_rate": 3.349644902212628e-05,
"loss": 0.0728,
"step": 284
},
{
"epoch": 0.6404494382022472,
"grad_norm": 5.772351264953613,
"learning_rate": 3.34684776212745e-05,
"loss": 2.3619,
"step": 285
},
{
"epoch": 0.6426966292134831,
"grad_norm": 3.769488573074341,
"learning_rate": 3.3440263474198376e-05,
"loss": 0.7464,
"step": 286
},
{
"epoch": 0.6449438202247191,
"grad_norm": 4.559601783752441,
"learning_rate": 3.3411807130193325e-05,
"loss": 0.6691,
"step": 287
},
{
"epoch": 0.647191011235955,
"grad_norm": 0.45337462425231934,
"learning_rate": 3.338310914327005e-05,
"loss": 0.059,
"step": 288
},
{
"epoch": 0.6494382022471911,
"grad_norm": 4.7184553146362305,
"learning_rate": 3.3354170072143766e-05,
"loss": 0.7841,
"step": 289
},
{
"epoch": 0.651685393258427,
"grad_norm": 3.886216640472412,
"learning_rate": 3.332499048022328e-05,
"loss": 0.647,
"step": 290
},
{
"epoch": 0.6539325842696629,
"grad_norm": 4.497567176818848,
"learning_rate": 3.329557093560006e-05,
"loss": 0.8814,
"step": 291
},
{
"epoch": 0.6561797752808989,
"grad_norm": 3.995391368865967,
"learning_rate": 3.326591201103716e-05,
"loss": 0.7247,
"step": 292
},
{
"epoch": 0.6584269662921348,
"grad_norm": 0.4348815083503723,
"learning_rate": 3.323601428395809e-05,
"loss": 0.059,
"step": 293
},
{
"epoch": 0.6606741573033708,
"grad_norm": 3.6197896003723145,
"learning_rate": 3.320587833643554e-05,
"loss": 0.8317,
"step": 294
},
{
"epoch": 0.6629213483146067,
"grad_norm": 4.4088215827941895,
"learning_rate": 3.317550475518006e-05,
"loss": 0.8548,
"step": 295
},
{
"epoch": 0.6651685393258427,
"grad_norm": 4.541014194488525,
"learning_rate": 3.314489413152867e-05,
"loss": 0.9213,
"step": 296
},
{
"epoch": 0.6674157303370787,
"grad_norm": 3.067857265472412,
"learning_rate": 3.311404706143329e-05,
"loss": 0.6923,
"step": 297
},
{
"epoch": 0.6696629213483146,
"grad_norm": 4.037753582000732,
"learning_rate": 3.3082964145449174e-05,
"loss": 0.7777,
"step": 298
},
{
"epoch": 0.6719101123595506,
"grad_norm": 4.280182838439941,
"learning_rate": 3.305164598872322e-05,
"loss": 0.7496,
"step": 299
},
{
"epoch": 0.6741573033707865,
"grad_norm": 4.357325077056885,
"learning_rate": 3.302009320098218e-05,
"loss": 0.7636,
"step": 300
},
{
"epoch": 0.6764044943820224,
"grad_norm": 4.007940292358398,
"learning_rate": 3.2988306396520775e-05,
"loss": 0.6867,
"step": 301
},
{
"epoch": 0.6786516853932584,
"grad_norm": 0.8544747233390808,
"learning_rate": 3.295628619418977e-05,
"loss": 0.0506,
"step": 302
},
{
"epoch": 0.6808988764044944,
"grad_norm": 3.34498929977417,
"learning_rate": 3.292403321738387e-05,
"loss": 0.3346,
"step": 303
},
{
"epoch": 0.6831460674157304,
"grad_norm": 2.441420316696167,
"learning_rate": 3.289154809402967e-05,
"loss": 0.2485,
"step": 304
},
{
"epoch": 0.6853932584269663,
"grad_norm": 4.533839702606201,
"learning_rate": 3.285883145657334e-05,
"loss": 0.8508,
"step": 305
},
{
"epoch": 0.6876404494382022,
"grad_norm": 3.2033944129943848,
"learning_rate": 3.2825883941968346e-05,
"loss": 0.8464,
"step": 306
},
{
"epoch": 0.6898876404494382,
"grad_norm": 3.6305220127105713,
"learning_rate": 3.279270619166309e-05,
"loss": 0.3385,
"step": 307
},
{
"epoch": 0.6921348314606741,
"grad_norm": 4.438405990600586,
"learning_rate": 3.2759298851588336e-05,
"loss": 0.8837,
"step": 308
},
{
"epoch": 0.6943820224719102,
"grad_norm": 4.252586841583252,
"learning_rate": 3.272566257214474e-05,
"loss": 0.9019,
"step": 309
},
{
"epoch": 0.6966292134831461,
"grad_norm": 4.231752872467041,
"learning_rate": 3.2691798008190096e-05,
"loss": 0.6922,
"step": 310
},
{
"epoch": 0.698876404494382,
"grad_norm": 3.862682342529297,
"learning_rate": 3.265770581902662e-05,
"loss": 0.6348,
"step": 311
},
{
"epoch": 0.701123595505618,
"grad_norm": 3.783026933670044,
"learning_rate": 3.262338666838813e-05,
"loss": 0.7522,
"step": 312
},
{
"epoch": 0.7033707865168539,
"grad_norm": 4.141933917999268,
"learning_rate": 3.25888412244271e-05,
"loss": 0.7843,
"step": 313
},
{
"epoch": 0.7056179775280899,
"grad_norm": 0.7638006210327148,
"learning_rate": 3.2554070159701684e-05,
"loss": 0.0493,
"step": 314
},
{
"epoch": 0.7078651685393258,
"grad_norm": 3.7285079956054688,
"learning_rate": 3.2519074151162564e-05,
"loss": 0.357,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_VitaminC_cosine_accuracy": 0.556640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8318675756454468,
"eval_VitaminC_cosine_ap": 0.553255462027648,
"eval_VitaminC_cosine_f1": 0.6666666666666666,
"eval_VitaminC_cosine_f1_threshold": 0.3080925941467285,
"eval_VitaminC_cosine_precision": 0.5,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 284.4936218261719,
"eval_VitaminC_dot_ap": 0.5335304755231123,
"eval_VitaminC_dot_f1": 0.6675531914893617,
"eval_VitaminC_dot_f1_threshold": 117.11366271972656,
"eval_VitaminC_dot_precision": 0.500998003992016,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 14.916669845581055,
"eval_VitaminC_euclidean_ap": 0.5560392780320775,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.758323669433594,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 303.769775390625,
"eval_VitaminC_manhattan_ap": 0.5575735035337728,
"eval_VitaminC_manhattan_f1": 0.6666666666666666,
"eval_VitaminC_manhattan_f1_threshold": 500.6726989746094,
"eval_VitaminC_manhattan_precision": 0.5,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.556640625,
"eval_VitaminC_max_accuracy_threshold": 303.769775390625,
"eval_VitaminC_max_ap": 0.5575735035337728,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 500.6726989746094,
"eval_VitaminC_max_precision": 0.500998003992016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5575735035337728,
"eval_sts-test_pearson_cosine": 0.884017793393225,
"eval_sts-test_pearson_dot": 0.8725802033594147,
"eval_sts-test_pearson_euclidean": 0.9065592531799239,
"eval_sts-test_pearson_manhattan": 0.9070236641674441,
"eval_sts-test_pearson_max": 0.9070236641674441,
"eval_sts-test_spearman_cosine": 0.9067846957888538,
"eval_sts-test_spearman_dot": 0.8716365180769119,
"eval_sts-test_spearman_euclidean": 0.9026938039800204,
"eval_sts-test_spearman_manhattan": 0.903306941012344,
"eval_sts-test_spearman_max": 0.9067846957888538,
"eval_vitaminc-pairs_loss": 1.4885247945785522,
"eval_vitaminc-pairs_runtime": 1.9137,
"eval_vitaminc-pairs_samples_per_second": 56.436,
"eval_vitaminc-pairs_steps_per_second": 1.045,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_negation-triplets_loss": 0.9597576856613159,
"eval_negation-triplets_runtime": 0.3023,
"eval_negation-triplets_samples_per_second": 211.742,
"eval_negation-triplets_steps_per_second": 3.308,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_scitail-pairs-pos_loss": 0.09951130300760269,
"eval_scitail-pairs-pos_runtime": 0.3896,
"eval_scitail-pairs-pos_samples_per_second": 138.608,
"eval_scitail-pairs-pos_steps_per_second": 2.567,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_scitail-pairs-qa_loss": 0.0010157548822462559,
"eval_scitail-pairs-qa_runtime": 0.5373,
"eval_scitail-pairs-qa_samples_per_second": 238.245,
"eval_scitail-pairs-qa_steps_per_second": 3.723,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_xsum-pairs_loss": 0.027823584154248238,
"eval_xsum-pairs_runtime": 2.7408,
"eval_xsum-pairs_samples_per_second": 46.701,
"eval_xsum-pairs_steps_per_second": 0.73,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_sciq_pairs_loss": 0.015241424553096294,
"eval_sciq_pairs_runtime": 2.8458,
"eval_sciq_pairs_samples_per_second": 44.978,
"eval_sciq_pairs_steps_per_second": 0.703,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_qasc_pairs_loss": 0.09173130989074707,
"eval_qasc_pairs_runtime": 0.6608,
"eval_qasc_pairs_samples_per_second": 193.694,
"eval_qasc_pairs_steps_per_second": 3.026,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_openbookqa_pairs_loss": 0.6921954154968262,
"eval_openbookqa_pairs_runtime": 0.5893,
"eval_openbookqa_pairs_samples_per_second": 217.196,
"eval_openbookqa_pairs_steps_per_second": 3.394,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_msmarco_pairs_loss": 0.15177518129348755,
"eval_msmarco_pairs_runtime": 1.494,
"eval_msmarco_pairs_samples_per_second": 85.673,
"eval_msmarco_pairs_steps_per_second": 1.339,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_nq_pairs_loss": 0.10136909037828445,
"eval_nq_pairs_runtime": 2.3524,
"eval_nq_pairs_samples_per_second": 54.413,
"eval_nq_pairs_steps_per_second": 0.85,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_trivia_pairs_loss": 0.5301617980003357,
"eval_trivia_pairs_runtime": 3.5809,
"eval_trivia_pairs_samples_per_second": 35.745,
"eval_trivia_pairs_steps_per_second": 0.559,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_gooaq_pairs_loss": 0.28424739837646484,
"eval_gooaq_pairs_runtime": 0.9167,
"eval_gooaq_pairs_samples_per_second": 139.635,
"eval_gooaq_pairs_steps_per_second": 2.182,
"step": 315
},
{
"epoch": 0.7078651685393258,
"eval_paws-pos_loss": 0.023981213569641113,
"eval_paws-pos_runtime": 0.6966,
"eval_paws-pos_samples_per_second": 183.744,
"eval_paws-pos_steps_per_second": 2.871,
"step": 315
},
{
"epoch": 0.7101123595505618,
"grad_norm": 3.6374969482421875,
"learning_rate": 3.248385388013984e-05,
"loss": 0.841,
"step": 316
},
{
"epoch": 0.7123595505617978,
"grad_norm": 4.251607418060303,
"learning_rate": 3.2448410032329716e-05,
"loss": 0.5849,
"step": 317
},
{
"epoch": 0.7146067415730337,
"grad_norm": 4.323038101196289,
"learning_rate": 3.241274329778117e-05,
"loss": 0.6818,
"step": 318
},
{
"epoch": 0.7168539325842697,
"grad_norm": 4.027289867401123,
"learning_rate": 3.237685437088251e-05,
"loss": 0.8269,
"step": 319
},
{
"epoch": 0.7191011235955056,
"grad_norm": 3.014479875564575,
"learning_rate": 3.234074395034787e-05,
"loss": 0.6979,
"step": 320
},
{
"epoch": 0.7213483146067415,
"grad_norm": 3.5980277061462402,
"learning_rate": 3.2304412739203595e-05,
"loss": 0.3218,
"step": 321
},
{
"epoch": 0.7235955056179775,
"grad_norm": 3.2924134731292725,
"learning_rate": 3.226786144477456e-05,
"loss": 0.8206,
"step": 322
},
{
"epoch": 0.7258426966292135,
"grad_norm": 2.524231195449829,
"learning_rate": 3.2231090778670385e-05,
"loss": 0.2106,
"step": 323
},
{
"epoch": 0.7280898876404495,
"grad_norm": 5.464061260223389,
"learning_rate": 3.2194101456771604e-05,
"loss": 1.0524,
"step": 324
},
{
"epoch": 0.7303370786516854,
"grad_norm": 3.4692578315734863,
"learning_rate": 3.215689419921572e-05,
"loss": 0.3774,
"step": 325
},
{
"epoch": 0.7325842696629213,
"grad_norm": 4.947183132171631,
"learning_rate": 3.211946973038315e-05,
"loss": 0.9098,
"step": 326
},
{
"epoch": 0.7348314606741573,
"grad_norm": 4.432866096496582,
"learning_rate": 3.208182877888319e-05,
"loss": 0.7988,
"step": 327
},
{
"epoch": 0.7370786516853932,
"grad_norm": 4.585951328277588,
"learning_rate": 3.204397207753978e-05,
"loss": 0.7916,
"step": 328
},
{
"epoch": 0.7393258426966293,
"grad_norm": 3.7288637161254883,
"learning_rate": 3.200590036337724e-05,
"loss": 0.6314,
"step": 329
},
{
"epoch": 0.7415730337078652,
"grad_norm": 3.840074300765991,
"learning_rate": 3.196761437760593e-05,
"loss": 0.8628,
"step": 330
},
{
"epoch": 0.7438202247191011,
"grad_norm": 0.6423048377037048,
"learning_rate": 3.192911486560784e-05,
"loss": 0.0688,
"step": 331
},
{
"epoch": 0.7460674157303371,
"grad_norm": 4.148509502410889,
"learning_rate": 3.1890402576922036e-05,
"loss": 0.7386,
"step": 332
},
{
"epoch": 0.748314606741573,
"grad_norm": 4.7345147132873535,
"learning_rate": 3.1851478265230103e-05,
"loss": 0.8458,
"step": 333
},
{
"epoch": 0.750561797752809,
"grad_norm": 0.695708155632019,
"learning_rate": 3.181234268834144e-05,
"loss": 0.0442,
"step": 334
},
{
"epoch": 0.7528089887640449,
"grad_norm": 3.434741735458374,
"learning_rate": 3.177299660817856e-05,
"loss": 0.317,
"step": 335
},
{
"epoch": 0.755056179775281,
"grad_norm": 3.306964874267578,
"learning_rate": 3.1733440790762176e-05,
"loss": 0.8087,
"step": 336
},
{
"epoch": 0.7573033707865169,
"grad_norm": 3.010828733444214,
"learning_rate": 3.169367600619637e-05,
"loss": 0.3398,
"step": 337
},
{
"epoch": 0.7595505617977528,
"grad_norm": 4.152151584625244,
"learning_rate": 3.1653703028653545e-05,
"loss": 0.699,
"step": 338
},
{
"epoch": 0.7617977528089888,
"grad_norm": 4.073326110839844,
"learning_rate": 3.161352263635937e-05,
"loss": 0.7901,
"step": 339
},
{
"epoch": 0.7640449438202247,
"grad_norm": 4.365633487701416,
"learning_rate": 3.157313561157764e-05,
"loss": 0.8072,
"step": 340
},
{
"epoch": 0.7662921348314606,
"grad_norm": 3.506556272506714,
"learning_rate": 3.153254274059501e-05,
"loss": 0.5939,
"step": 341
},
{
"epoch": 0.7685393258426966,
"grad_norm": 4.319092273712158,
"learning_rate": 3.149174481370575e-05,
"loss": 0.6933,
"step": 342
},
{
"epoch": 0.7707865168539326,
"grad_norm": 0.6184964179992676,
"learning_rate": 3.145074262519629e-05,
"loss": 0.0437,
"step": 343
},
{
"epoch": 0.7730337078651686,
"grad_norm": 4.866581916809082,
"learning_rate": 3.140953697332979e-05,
"loss": 0.9882,
"step": 344
},
{
"epoch": 0.7752808988764045,
"grad_norm": 3.9585559368133545,
"learning_rate": 3.136812866033063e-05,
"loss": 0.3707,
"step": 345
},
{
"epoch": 0.7775280898876404,
"grad_norm": 4.253391265869141,
"learning_rate": 3.132651849236871e-05,
"loss": 0.7103,
"step": 346
},
{
"epoch": 0.7797752808988764,
"grad_norm": 0.5847011208534241,
"learning_rate": 3.128470727954383e-05,
"loss": 0.0372,
"step": 347
},
{
"epoch": 0.7820224719101123,
"grad_norm": 0.5127836465835571,
"learning_rate": 3.124269583586989e-05,
"loss": 0.028,
"step": 348
},
{
"epoch": 0.7842696629213484,
"grad_norm": 4.145182132720947,
"learning_rate": 3.120048497925904e-05,
"loss": 0.7676,
"step": 349
},
{
"epoch": 0.7865168539325843,
"grad_norm": 4.833105087280273,
"learning_rate": 3.1158075531505755e-05,
"loss": 0.6754,
"step": 350
},
{
"epoch": 0.7887640449438202,
"grad_norm": 0.49345946311950684,
"learning_rate": 3.1115468318270844e-05,
"loss": 0.0439,
"step": 351
},
{
"epoch": 0.7910112359550562,
"grad_norm": 3.357720375061035,
"learning_rate": 3.107266416906538e-05,
"loss": 0.8039,
"step": 352
},
{
"epoch": 0.7932584269662921,
"grad_norm": 0.2371903359889984,
"learning_rate": 3.1029663917234514e-05,
"loss": 0.0104,
"step": 353
},
{
"epoch": 0.7955056179775281,
"grad_norm": 0.48881796002388,
"learning_rate": 3.098646839994132e-05,
"loss": 0.0555,
"step": 354
},
{
"epoch": 0.797752808988764,
"grad_norm": 3.3021090030670166,
"learning_rate": 3.094307845815042e-05,
"loss": 0.8646,
"step": 355
},
{
"epoch": 0.8,
"grad_norm": 3.0412533283233643,
"learning_rate": 3.0899494936611663e-05,
"loss": 0.7781,
"step": 356
},
{
"epoch": 0.802247191011236,
"grad_norm": 0.30917835235595703,
"learning_rate": 3.085571868384366e-05,
"loss": 0.011,
"step": 357
},
{
"epoch": 0.8044943820224719,
"grad_norm": 3.6957950592041016,
"learning_rate": 3.081175055211726e-05,
"loss": 0.3267,
"step": 358
},
{
"epoch": 0.8067415730337079,
"grad_norm": 7.202300071716309,
"learning_rate": 3.0767591397438974e-05,
"loss": 2.5281,
"step": 359
},
{
"epoch": 0.8089887640449438,
"grad_norm": 2.9833834171295166,
"learning_rate": 3.072324207953429e-05,
"loss": 0.301,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.6793336868286133,
"eval_VitaminC_cosine_ap": 0.5555632752592039,
"eval_VitaminC_cosine_f1": 0.6657824933687002,
"eval_VitaminC_cosine_f1_threshold": 0.28029173612594604,
"eval_VitaminC_cosine_precision": 0.4990059642147117,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 265.5102844238281,
"eval_VitaminC_dot_ap": 0.5326105108889087,
"eval_VitaminC_dot_f1": 0.6675531914893617,
"eval_VitaminC_dot_f1_threshold": 106.37774658203125,
"eval_VitaminC_dot_precision": 0.500998003992016,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55859375,
"eval_VitaminC_euclidean_accuracy_threshold": 15.296594619750977,
"eval_VitaminC_euclidean_ap": 0.5592294311948881,
"eval_VitaminC_euclidean_f1": 0.6657824933687002,
"eval_VitaminC_euclidean_f1_threshold": 23.58568572998047,
"eval_VitaminC_euclidean_precision": 0.4990059642147117,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.556640625,
"eval_VitaminC_manhattan_accuracy_threshold": 306.79913330078125,
"eval_VitaminC_manhattan_ap": 0.5598941655081213,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 512.0101318359375,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 306.79913330078125,
"eval_VitaminC_max_ap": 0.5598941655081213,
"eval_VitaminC_max_f1": 0.6675531914893617,
"eval_VitaminC_max_f1_threshold": 512.0101318359375,
"eval_VitaminC_max_precision": 0.500998003992016,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5598941655081213,
"eval_sts-test_pearson_cosine": 0.8832151520369376,
"eval_sts-test_pearson_dot": 0.8763916954110884,
"eval_sts-test_pearson_euclidean": 0.9046869354209082,
"eval_sts-test_pearson_manhattan": 0.9047119917370259,
"eval_sts-test_pearson_max": 0.9047119917370259,
"eval_sts-test_spearman_cosine": 0.9054341922225841,
"eval_sts-test_spearman_dot": 0.8786041104705073,
"eval_sts-test_spearman_euclidean": 0.9002407635868509,
"eval_sts-test_spearman_manhattan": 0.9006719867416183,
"eval_sts-test_spearman_max": 0.9054341922225841,
"eval_vitaminc-pairs_loss": 1.4290639162063599,
"eval_vitaminc-pairs_runtime": 1.8905,
"eval_vitaminc-pairs_samples_per_second": 57.128,
"eval_vitaminc-pairs_steps_per_second": 1.058,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_negation-triplets_loss": 0.9030703902244568,
"eval_negation-triplets_runtime": 0.2986,
"eval_negation-triplets_samples_per_second": 214.299,
"eval_negation-triplets_steps_per_second": 3.348,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_scitail-pairs-pos_loss": 0.10728535801172256,
"eval_scitail-pairs-pos_runtime": 0.3831,
"eval_scitail-pairs-pos_samples_per_second": 140.965,
"eval_scitail-pairs-pos_steps_per_second": 2.61,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_scitail-pairs-qa_loss": 0.0005650219391100109,
"eval_scitail-pairs-qa_runtime": 0.5259,
"eval_scitail-pairs-qa_samples_per_second": 243.397,
"eval_scitail-pairs-qa_steps_per_second": 3.803,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_xsum-pairs_loss": 0.025990577414631844,
"eval_xsum-pairs_runtime": 2.734,
"eval_xsum-pairs_samples_per_second": 46.818,
"eval_xsum-pairs_steps_per_second": 0.732,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_sciq_pairs_loss": 0.016017427667975426,
"eval_sciq_pairs_runtime": 2.8252,
"eval_sciq_pairs_samples_per_second": 45.307,
"eval_sciq_pairs_steps_per_second": 0.708,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_qasc_pairs_loss": 0.10250324755907059,
"eval_qasc_pairs_runtime": 0.6511,
"eval_qasc_pairs_samples_per_second": 196.585,
"eval_qasc_pairs_steps_per_second": 3.072,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_openbookqa_pairs_loss": 0.6710968613624573,
"eval_openbookqa_pairs_runtime": 0.5776,
"eval_openbookqa_pairs_samples_per_second": 221.625,
"eval_openbookqa_pairs_steps_per_second": 3.463,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_msmarco_pairs_loss": 0.14522777497768402,
"eval_msmarco_pairs_runtime": 1.4981,
"eval_msmarco_pairs_samples_per_second": 85.441,
"eval_msmarco_pairs_steps_per_second": 1.335,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_nq_pairs_loss": 0.10225611180067062,
"eval_nq_pairs_runtime": 2.3595,
"eval_nq_pairs_samples_per_second": 54.248,
"eval_nq_pairs_steps_per_second": 0.848,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_trivia_pairs_loss": 0.5312957167625427,
"eval_trivia_pairs_runtime": 3.5813,
"eval_trivia_pairs_samples_per_second": 35.741,
"eval_trivia_pairs_steps_per_second": 0.558,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_gooaq_pairs_loss": 0.27713337540626526,
"eval_gooaq_pairs_runtime": 0.9166,
"eval_gooaq_pairs_samples_per_second": 139.645,
"eval_gooaq_pairs_steps_per_second": 2.182,
"step": 360
},
{
"epoch": 0.8089887640449438,
"eval_paws-pos_loss": 0.024326296523213387,
"eval_paws-pos_runtime": 0.6893,
"eval_paws-pos_samples_per_second": 185.682,
"eval_paws-pos_steps_per_second": 2.901,
"step": 360
},
{
"epoch": 0.8112359550561797,
"grad_norm": 4.372533798217773,
"learning_rate": 3.067870346183096e-05,
"loss": 0.7533,
"step": 361
},
{
"epoch": 0.8134831460674158,
"grad_norm": 2.6585452556610107,
"learning_rate": 3.063397641144216e-05,
"loss": 0.2958,
"step": 362
},
{
"epoch": 0.8157303370786517,
"grad_norm": 4.378647327423096,
"learning_rate": 3.058906179914962e-05,
"loss": 0.8296,
"step": 363
},
{
"epoch": 0.8179775280898877,
"grad_norm": 3.1601309776306152,
"learning_rate": 3.0543960499386694e-05,
"loss": 0.3191,
"step": 364
},
{
"epoch": 0.8202247191011236,
"grad_norm": 3.446498394012451,
"learning_rate": 3.049867339022129e-05,
"loss": 0.7866,
"step": 365
},
{
"epoch": 0.8224719101123595,
"grad_norm": 3.0058486461639404,
"learning_rate": 3.0453201353338826e-05,
"loss": 0.3157,
"step": 366
},
{
"epoch": 0.8247191011235955,
"grad_norm": 4.380611419677734,
"learning_rate": 3.040754527402502e-05,
"loss": 0.7402,
"step": 367
},
{
"epoch": 0.8269662921348314,
"grad_norm": 3.8081209659576416,
"learning_rate": 3.036170604114869e-05,
"loss": 0.4957,
"step": 368
},
{
"epoch": 0.8292134831460675,
"grad_norm": 4.2056989669799805,
"learning_rate": 3.031568454714442e-05,
"loss": 0.8505,
"step": 369
},
{
"epoch": 0.8314606741573034,
"grad_norm": 3.101804733276367,
"learning_rate": 3.0269481687995207e-05,
"loss": 0.7702,
"step": 370
},
{
"epoch": 0.8337078651685393,
"grad_norm": 4.0704345703125,
"learning_rate": 3.0223098363215002e-05,
"loss": 0.7591,
"step": 371
},
{
"epoch": 0.8359550561797753,
"grad_norm": 2.9631364345550537,
"learning_rate": 3.0176535475831208e-05,
"loss": 0.727,
"step": 372
},
{
"epoch": 0.8382022471910112,
"grad_norm": 3.3760929107666016,
"learning_rate": 3.01297939323671e-05,
"loss": 0.3233,
"step": 373
},
{
"epoch": 0.8404494382022472,
"grad_norm": 4.116260051727295,
"learning_rate": 3.0082874642824164e-05,
"loss": 0.8738,
"step": 374
},
{
"epoch": 0.8426966292134831,
"grad_norm": 0.40298929810523987,
"learning_rate": 3.0035778520664388e-05,
"loss": 0.0393,
"step": 375
},
{
"epoch": 0.8449438202247191,
"grad_norm": 3.0647614002227783,
"learning_rate": 2.9988506482792485e-05,
"loss": 0.7454,
"step": 376
},
{
"epoch": 0.8471910112359551,
"grad_norm": 2.951953649520874,
"learning_rate": 2.994105944953803e-05,
"loss": 0.8297,
"step": 377
},
{
"epoch": 0.849438202247191,
"grad_norm": 4.049951553344727,
"learning_rate": 2.9893438344637538e-05,
"loss": 0.7802,
"step": 378
},
{
"epoch": 0.851685393258427,
"grad_norm": 3.7383949756622314,
"learning_rate": 2.984564409521651e-05,
"loss": 0.6229,
"step": 379
},
{
"epoch": 0.8539325842696629,
"grad_norm": 0.0,
"learning_rate": 2.979767763177134e-05,
"loss": 0.0,
"step": 380
},
{
"epoch": 0.8561797752808988,
"grad_norm": 3.399641513824463,
"learning_rate": 2.9749539888151244e-05,
"loss": 0.3506,
"step": 381
},
{
"epoch": 0.8584269662921349,
"grad_norm": 0.48723292350769043,
"learning_rate": 2.9701231801540032e-05,
"loss": 0.041,
"step": 382
},
{
"epoch": 0.8606741573033708,
"grad_norm": 3.1171765327453613,
"learning_rate": 2.9652754312437897e-05,
"loss": 0.725,
"step": 383
},
{
"epoch": 0.8629213483146068,
"grad_norm": 2.6491808891296387,
"learning_rate": 2.9604108364643112e-05,
"loss": 0.257,
"step": 384
},
{
"epoch": 0.8651685393258427,
"grad_norm": 4.025605201721191,
"learning_rate": 2.9555294905233606e-05,
"loss": 0.7912,
"step": 385
},
{
"epoch": 0.8674157303370786,
"grad_norm": 4.142299652099609,
"learning_rate": 2.9506314884548583e-05,
"loss": 0.8915,
"step": 386
},
{
"epoch": 0.8696629213483146,
"grad_norm": 2.943582534790039,
"learning_rate": 2.945716925616998e-05,
"loss": 0.779,
"step": 387
},
{
"epoch": 0.8719101123595505,
"grad_norm": 4.478114604949951,
"learning_rate": 2.9407858976903913e-05,
"loss": 0.7828,
"step": 388
},
{
"epoch": 0.8741573033707866,
"grad_norm": 3.9878995418548584,
"learning_rate": 2.935838500676207e-05,
"loss": 0.7462,
"step": 389
},
{
"epoch": 0.8764044943820225,
"grad_norm": 3.7733311653137207,
"learning_rate": 2.9308748308942983e-05,
"loss": 0.7913,
"step": 390
},
{
"epoch": 0.8786516853932584,
"grad_norm": 3.179732322692871,
"learning_rate": 2.9258949849813315e-05,
"loss": 0.3209,
"step": 391
},
{
"epoch": 0.8808988764044944,
"grad_norm": 3.6665351390838623,
"learning_rate": 2.9208990598889008e-05,
"loss": 0.5932,
"step": 392
},
{
"epoch": 0.8831460674157303,
"grad_norm": 0.545093834400177,
"learning_rate": 2.9158871528816442e-05,
"loss": 0.0613,
"step": 393
},
{
"epoch": 0.8853932584269663,
"grad_norm": 5.226474285125732,
"learning_rate": 2.9108593615353467e-05,
"loss": 0.8802,
"step": 394
},
{
"epoch": 0.8876404494382022,
"grad_norm": 3.691817283630371,
"learning_rate": 2.9058157837350437e-05,
"loss": 0.6116,
"step": 395
},
{
"epoch": 0.8898876404494382,
"grad_norm": 0.4754512906074524,
"learning_rate": 2.900756517673113e-05,
"loss": 0.0537,
"step": 396
},
{
"epoch": 0.8921348314606742,
"grad_norm": 2.874117374420166,
"learning_rate": 2.8956816618473647e-05,
"loss": 0.3006,
"step": 397
},
{
"epoch": 0.8943820224719101,
"grad_norm": 3.8957912921905518,
"learning_rate": 2.890591315059121e-05,
"loss": 0.7636,
"step": 398
},
{
"epoch": 0.8966292134831461,
"grad_norm": 3.7385432720184326,
"learning_rate": 2.8854855764112973e-05,
"loss": 0.612,
"step": 399
},
{
"epoch": 0.898876404494382,
"grad_norm": 3.7403082847595215,
"learning_rate": 2.880364545306468e-05,
"loss": 0.54,
"step": 400
},
{
"epoch": 0.9011235955056179,
"grad_norm": 2.7360849380493164,
"learning_rate": 2.8752283214449328e-05,
"loss": 0.2761,
"step": 401
},
{
"epoch": 0.903370786516854,
"grad_norm": 8.988025665283203,
"learning_rate": 2.8700770048227775e-05,
"loss": 1.2668,
"step": 402
},
{
"epoch": 0.9056179775280899,
"grad_norm": 3.411295175552368,
"learning_rate": 2.864910695729925e-05,
"loss": 0.8066,
"step": 403
},
{
"epoch": 0.9078651685393259,
"grad_norm": 0.3018481135368347,
"learning_rate": 2.8597294947481834e-05,
"loss": 0.0094,
"step": 404
},
{
"epoch": 0.9101123595505618,
"grad_norm": 4.116438388824463,
"learning_rate": 2.8545335027492885e-05,
"loss": 0.673,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_VitaminC_cosine_accuracy": 0.55859375,
"eval_VitaminC_cosine_accuracy_threshold": 0.7188639044761658,
"eval_VitaminC_cosine_ap": 0.5516905675485202,
"eval_VitaminC_cosine_f1": 0.6675712347354138,
"eval_VitaminC_cosine_f1_threshold": 0.42514583468437195,
"eval_VitaminC_cosine_precision": 0.5061728395061729,
"eval_VitaminC_cosine_recall": 0.9800796812749004,
"eval_VitaminC_dot_accuracy": 0.548828125,
"eval_VitaminC_dot_accuracy_threshold": 320.3775329589844,
"eval_VitaminC_dot_ap": 0.5343066680873013,
"eval_VitaminC_dot_f1": 0.6720867208672087,
"eval_VitaminC_dot_f1_threshold": 152.709716796875,
"eval_VitaminC_dot_precision": 0.5092402464065708,
"eval_VitaminC_dot_recall": 0.9880478087649402,
"eval_VitaminC_euclidean_accuracy": 0.556640625,
"eval_VitaminC_euclidean_accuracy_threshold": 15.12228775024414,
"eval_VitaminC_euclidean_ap": 0.5542894540784595,
"eval_VitaminC_euclidean_f1": 0.6640211640211641,
"eval_VitaminC_euclidean_f1_threshold": 24.3716983795166,
"eval_VitaminC_euclidean_precision": 0.497029702970297,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 305.93597412109375,
"eval_VitaminC_manhattan_ap": 0.5533328154567183,
"eval_VitaminC_manhattan_f1": 0.6649006622516557,
"eval_VitaminC_manhattan_f1_threshold": 509.4247741699219,
"eval_VitaminC_manhattan_precision": 0.498015873015873,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 320.3775329589844,
"eval_VitaminC_max_ap": 0.5542894540784595,
"eval_VitaminC_max_f1": 0.6720867208672087,
"eval_VitaminC_max_f1_threshold": 509.4247741699219,
"eval_VitaminC_max_precision": 0.5092402464065708,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5542894540784595,
"eval_sts-test_pearson_cosine": 0.8820726638294588,
"eval_sts-test_pearson_dot": 0.8723940521896922,
"eval_sts-test_pearson_euclidean": 0.9038814103150634,
"eval_sts-test_pearson_manhattan": 0.904449390563823,
"eval_sts-test_pearson_max": 0.904449390563823,
"eval_sts-test_spearman_cosine": 0.9051641183600871,
"eval_sts-test_spearman_dot": 0.8721959088443044,
"eval_sts-test_spearman_euclidean": 0.8999642007914521,
"eval_sts-test_spearman_manhattan": 0.9005904051921018,
"eval_sts-test_spearman_max": 0.9051641183600871,
"eval_vitaminc-pairs_loss": 1.48486328125,
"eval_vitaminc-pairs_runtime": 1.8874,
"eval_vitaminc-pairs_samples_per_second": 57.222,
"eval_vitaminc-pairs_steps_per_second": 1.06,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_negation-triplets_loss": 0.9023827314376831,
"eval_negation-triplets_runtime": 0.302,
"eval_negation-triplets_samples_per_second": 211.927,
"eval_negation-triplets_steps_per_second": 3.311,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_scitail-pairs-pos_loss": 0.10495099425315857,
"eval_scitail-pairs-pos_runtime": 0.3856,
"eval_scitail-pairs-pos_samples_per_second": 140.031,
"eval_scitail-pairs-pos_steps_per_second": 2.593,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_scitail-pairs-qa_loss": 0.0008332311408594251,
"eval_scitail-pairs-qa_runtime": 0.5224,
"eval_scitail-pairs-qa_samples_per_second": 245.005,
"eval_scitail-pairs-qa_steps_per_second": 3.828,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_xsum-pairs_loss": 0.028531953692436218,
"eval_xsum-pairs_runtime": 2.7425,
"eval_xsum-pairs_samples_per_second": 46.672,
"eval_xsum-pairs_steps_per_second": 0.729,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_sciq_pairs_loss": 0.015175853855907917,
"eval_sciq_pairs_runtime": 2.8294,
"eval_sciq_pairs_samples_per_second": 45.239,
"eval_sciq_pairs_steps_per_second": 0.707,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_qasc_pairs_loss": 0.09416583180427551,
"eval_qasc_pairs_runtime": 0.6538,
"eval_qasc_pairs_samples_per_second": 195.781,
"eval_qasc_pairs_steps_per_second": 3.059,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_openbookqa_pairs_loss": 0.715216875076294,
"eval_openbookqa_pairs_runtime": 0.578,
"eval_openbookqa_pairs_samples_per_second": 221.449,
"eval_openbookqa_pairs_steps_per_second": 3.46,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_msmarco_pairs_loss": 0.1417744755744934,
"eval_msmarco_pairs_runtime": 1.4882,
"eval_msmarco_pairs_samples_per_second": 86.012,
"eval_msmarco_pairs_steps_per_second": 1.344,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_nq_pairs_loss": 0.10870223492383957,
"eval_nq_pairs_runtime": 2.3451,
"eval_nq_pairs_samples_per_second": 54.583,
"eval_nq_pairs_steps_per_second": 0.853,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_trivia_pairs_loss": 0.49194595217704773,
"eval_trivia_pairs_runtime": 3.5796,
"eval_trivia_pairs_samples_per_second": 35.759,
"eval_trivia_pairs_steps_per_second": 0.559,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_gooaq_pairs_loss": 0.2616226375102997,
"eval_gooaq_pairs_runtime": 0.9137,
"eval_gooaq_pairs_samples_per_second": 140.093,
"eval_gooaq_pairs_steps_per_second": 2.189,
"step": 405
},
{
"epoch": 0.9101123595505618,
"eval_paws-pos_loss": 0.02422034554183483,
"eval_paws-pos_runtime": 0.6895,
"eval_paws-pos_samples_per_second": 185.641,
"eval_paws-pos_steps_per_second": 2.901,
"step": 405
},
{
"epoch": 0.9123595505617977,
"grad_norm": 3.427104949951172,
"learning_rate": 2.8493228208929387e-05,
"loss": 0.5189,
"step": 406
},
{
"epoch": 0.9146067415730337,
"grad_norm": 4.941195487976074,
"learning_rate": 2.8440975506248268e-05,
"loss": 0.649,
"step": 407
},
{
"epoch": 0.9168539325842696,
"grad_norm": 2.7992403507232666,
"learning_rate": 2.8388577936746633e-05,
"loss": 0.2982,
"step": 408
},
{
"epoch": 0.9191011235955057,
"grad_norm": 3.8877484798431396,
"learning_rate": 2.833603652054199e-05,
"loss": 0.7511,
"step": 409
},
{
"epoch": 0.9213483146067416,
"grad_norm": 3.2458090782165527,
"learning_rate": 2.8283352280552348e-05,
"loss": 0.5164,
"step": 410
},
{
"epoch": 0.9235955056179775,
"grad_norm": 3.7385945320129395,
"learning_rate": 2.8230526242476332e-05,
"loss": 0.5924,
"step": 411
},
{
"epoch": 0.9258426966292135,
"grad_norm": 4.369627952575684,
"learning_rate": 2.8177559434773203e-05,
"loss": 0.8191,
"step": 412
},
{
"epoch": 0.9280898876404494,
"grad_norm": 2.95206356048584,
"learning_rate": 2.8124452888642838e-05,
"loss": 0.2311,
"step": 413
},
{
"epoch": 0.9303370786516854,
"grad_norm": 3.984375238418579,
"learning_rate": 2.8071207638005662e-05,
"loss": 0.7421,
"step": 414
},
{
"epoch": 0.9325842696629213,
"grad_norm": 3.0188541412353516,
"learning_rate": 2.801782471948248e-05,
"loss": 0.2936,
"step": 415
},
{
"epoch": 0.9348314606741573,
"grad_norm": 4.104308605194092,
"learning_rate": 2.7964305172374362e-05,
"loss": 0.737,
"step": 416
},
{
"epoch": 0.9370786516853933,
"grad_norm": 3.686523675918579,
"learning_rate": 2.791065003864235e-05,
"loss": 0.6539,
"step": 417
},
{
"epoch": 0.9393258426966292,
"grad_norm": 3.839590311050415,
"learning_rate": 2.785686036288719e-05,
"loss": 0.6855,
"step": 418
},
{
"epoch": 0.9415730337078652,
"grad_norm": 4.174718856811523,
"learning_rate": 2.780293719232902e-05,
"loss": 0.8134,
"step": 419
},
{
"epoch": 0.9438202247191011,
"grad_norm": 4.046380043029785,
"learning_rate": 2.7748881576786946e-05,
"loss": 0.6885,
"step": 420
},
{
"epoch": 0.946067415730337,
"grad_norm": 3.4202940464019775,
"learning_rate": 2.7694694568658613e-05,
"loss": 0.5581,
"step": 421
},
{
"epoch": 0.9483146067415731,
"grad_norm": 3.787081718444824,
"learning_rate": 2.764037722289973e-05,
"loss": 0.8029,
"step": 422
},
{
"epoch": 0.950561797752809,
"grad_norm": 3.870718240737915,
"learning_rate": 2.7585930597003524e-05,
"loss": 0.8126,
"step": 423
},
{
"epoch": 0.952808988764045,
"grad_norm": 3.1959424018859863,
"learning_rate": 2.753135575098015e-05,
"loss": 0.8425,
"step": 424
},
{
"epoch": 0.9550561797752809,
"grad_norm": 0.4186573922634125,
"learning_rate": 2.7476653747336047e-05,
"loss": 0.049,
"step": 425
},
{
"epoch": 0.9573033707865168,
"grad_norm": 4.299917697906494,
"learning_rate": 2.7421825651053265e-05,
"loss": 0.7849,
"step": 426
},
{
"epoch": 0.9595505617977528,
"grad_norm": 2.6435227394104004,
"learning_rate": 2.736687252956873e-05,
"loss": 0.068,
"step": 427
},
{
"epoch": 0.9617977528089887,
"grad_norm": 2.717653274536133,
"learning_rate": 2.7311795452753443e-05,
"loss": 0.2925,
"step": 428
},
{
"epoch": 0.9640449438202248,
"grad_norm": 3.6929807662963867,
"learning_rate": 2.7256595492891683e-05,
"loss": 0.777,
"step": 429
},
{
"epoch": 0.9662921348314607,
"grad_norm": 2.8760790824890137,
"learning_rate": 2.720127372466011e-05,
"loss": 0.7397,
"step": 430
},
{
"epoch": 0.9685393258426966,
"grad_norm": 0.03685740381479263,
"learning_rate": 2.714583122510683e-05,
"loss": 0.0007,
"step": 431
},
{
"epoch": 0.9707865168539326,
"grad_norm": 4.058692455291748,
"learning_rate": 2.709026907363047e-05,
"loss": 0.8535,
"step": 432
},
{
"epoch": 0.9730337078651685,
"grad_norm": 4.2914276123046875,
"learning_rate": 2.703458835195911e-05,
"loss": 0.7026,
"step": 433
},
{
"epoch": 0.9752808988764045,
"grad_norm": 3.735518217086792,
"learning_rate": 2.6978790144129262e-05,
"loss": 0.7557,
"step": 434
},
{
"epoch": 0.9775280898876404,
"grad_norm": 4.058504104614258,
"learning_rate": 2.6922875536464747e-05,
"loss": 0.7225,
"step": 435
},
{
"epoch": 0.9797752808988764,
"grad_norm": 0.0,
"learning_rate": 2.6866845617555555e-05,
"loss": 0.0,
"step": 436
},
{
"epoch": 0.9820224719101124,
"grad_norm": 5.648872375488281,
"learning_rate": 2.6810701478236642e-05,
"loss": 0.4131,
"step": 437
},
{
"epoch": 0.9842696629213483,
"grad_norm": 2.7032744884490967,
"learning_rate": 2.6754444211566702e-05,
"loss": 0.2824,
"step": 438
},
{
"epoch": 0.9865168539325843,
"grad_norm": 3.150801420211792,
"learning_rate": 2.6698074912806882e-05,
"loss": 0.3144,
"step": 439
},
{
"epoch": 0.9887640449438202,
"grad_norm": 2.3572490215301514,
"learning_rate": 2.6641594679399448e-05,
"loss": 0.0509,
"step": 440
},
{
"epoch": 0.9910112359550561,
"grad_norm": 3.2544448375701904,
"learning_rate": 2.6585004610946452e-05,
"loss": 0.7645,
"step": 441
},
{
"epoch": 0.9932584269662922,
"grad_norm": 4.310440540313721,
"learning_rate": 2.6528305809188273e-05,
"loss": 0.2787,
"step": 442
},
{
"epoch": 0.9955056179775281,
"grad_norm": 3.863487482070923,
"learning_rate": 2.6471499377982225e-05,
"loss": 0.64,
"step": 443
},
{
"epoch": 0.9977528089887641,
"grad_norm": 6.1020612716674805,
"learning_rate": 2.6414586423281017e-05,
"loss": 0.4045,
"step": 444
},
{
"epoch": 1.0,
"grad_norm": 3.1245224475860596,
"learning_rate": 2.6357568053111255e-05,
"loss": 0.7661,
"step": 445
}
],
"logging_steps": 1,
"max_steps": 890,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 89,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 640,
"trial_name": null,
"trial_params": null
}