|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3021978021978022, |
|
"eval_steps": 5, |
|
"global_step": 55, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005494505494505495, |
|
"grad_norm": 38.56446075439453, |
|
"learning_rate": 2.0718232044198892e-07, |
|
"loss": 8.8159, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01098901098901099, |
|
"grad_norm": 42.89330291748047, |
|
"learning_rate": 4.1436464088397783e-07, |
|
"loss": 9.1259, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.016483516483516484, |
|
"grad_norm": 40.88720703125, |
|
"learning_rate": 6.215469613259668e-07, |
|
"loss": 8.9017, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02197802197802198, |
|
"grad_norm": 43.001651763916016, |
|
"learning_rate": 8.287292817679557e-07, |
|
"loss": 9.1969, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.027472527472527472, |
|
"grad_norm": 47.374000549316406, |
|
"learning_rate": 1.0359116022099446e-06, |
|
"loss": 9.3716, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.027472527472527472, |
|
"eval_Qnli-dev_cosine_accuracy": 0.599609375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9149316549301147, |
|
"eval_Qnli-dev_cosine_ap": 0.5535936772329058, |
|
"eval_Qnli-dev_cosine_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6663029193878174, |
|
"eval_Qnli-dev_cosine_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_cosine_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_dot_accuracy": 0.576171875, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 376.8692321777344, |
|
"eval_Qnli-dev_dot_ap": 0.49386849366879665, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 237.3916015625, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.603515625, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 8.217397689819336, |
|
"eval_Qnli-dev_euclidean_ap": 0.5622359472661989, |
|
"eval_Qnli-dev_euclidean_f1": 0.6307277628032345, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 17.456497192382812, |
|
"eval_Qnli-dev_euclidean_precision": 0.4624505928853755, |
|
"eval_Qnli-dev_euclidean_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.6171875, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 170.3812255859375, |
|
"eval_Qnli-dev_manhattan_ap": 0.6067473143476283, |
|
"eval_Qnli-dev_manhattan_f1": 0.629878869448183, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 251.22586059570312, |
|
"eval_Qnli-dev_manhattan_precision": 0.46153846153846156, |
|
"eval_Qnli-dev_manhattan_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_max_accuracy": 0.6171875, |
|
"eval_Qnli-dev_max_accuracy_threshold": 376.8692321777344, |
|
"eval_Qnli-dev_max_ap": 0.6067473143476283, |
|
"eval_Qnli-dev_max_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_max_f1_threshold": 251.22586059570312, |
|
"eval_Qnli-dev_max_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.6640625, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.984787106513977, |
|
"eval_allNLI-dev_cosine_ap": 0.34628735123984455, |
|
"eval_allNLI-dev_cosine_f1": 0.5105105105105106, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7257537841796875, |
|
"eval_allNLI-dev_cosine_precision": 0.3448275862068966, |
|
"eval_allNLI-dev_cosine_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 510.50128173828125, |
|
"eval_allNLI-dev_dot_ap": 0.3331728171237445, |
|
"eval_allNLI-dev_dot_f1": 0.5073746312684366, |
|
"eval_allNLI-dev_dot_f1_threshold": 320.4217834472656, |
|
"eval_allNLI-dev_dot_precision": 0.3405940594059406, |
|
"eval_allNLI-dev_dot_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.6640625, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 3.554497241973877, |
|
"eval_allNLI-dev_euclidean_ap": 0.3510566487009116, |
|
"eval_allNLI-dev_euclidean_f1": 0.5120481927710844, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 16.369325637817383, |
|
"eval_allNLI-dev_euclidean_precision": 0.34623217922606925, |
|
"eval_allNLI-dev_euclidean_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.6640625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 59.6655387878418, |
|
"eval_allNLI-dev_manhattan_ap": 0.3706224646404015, |
|
"eval_allNLI-dev_manhattan_f1": 0.5096870342771982, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 247.54861450195312, |
|
"eval_allNLI-dev_manhattan_precision": 0.3433734939759036, |
|
"eval_allNLI-dev_manhattan_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_max_accuracy": 0.6640625, |
|
"eval_allNLI-dev_max_accuracy_threshold": 510.50128173828125, |
|
"eval_allNLI-dev_max_ap": 0.3706224646404015, |
|
"eval_allNLI-dev_max_f1": 0.5120481927710844, |
|
"eval_allNLI-dev_max_f1_threshold": 320.4217834472656, |
|
"eval_allNLI-dev_max_precision": 0.34623217922606925, |
|
"eval_allNLI-dev_max_recall": 0.9942196531791907, |
|
"eval_loss": 1.3745524883270264, |
|
"eval_runtime": 56.8233, |
|
"eval_samples_per_second": 26.503, |
|
"eval_sequential_score": 0.6067473143476283, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.1514570156735535, |
|
"eval_sts-test_pearson_dot": 0.28408663830954645, |
|
"eval_sts-test_pearson_euclidean": 0.14094815932702276, |
|
"eval_sts-test_pearson_manhattan": 0.18757962873571718, |
|
"eval_sts-test_pearson_max": 0.28408663830954645, |
|
"eval_sts-test_spearman_cosine": 0.19430208270682803, |
|
"eval_sts-test_spearman_dot": 0.29861509823099586, |
|
"eval_sts-test_spearman_euclidean": 0.16253371729283397, |
|
"eval_sts-test_spearman_manhattan": 0.20774542441268956, |
|
"eval_sts-test_spearman_max": 0.29861509823099586, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03296703296703297, |
|
"grad_norm": 41.89334487915039, |
|
"learning_rate": 1.2430939226519335e-06, |
|
"loss": 9.0425, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 38.501129150390625, |
|
"learning_rate": 1.4502762430939224e-06, |
|
"loss": 8.7309, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04395604395604396, |
|
"grad_norm": 42.019371032714844, |
|
"learning_rate": 1.6574585635359113e-06, |
|
"loss": 9.0123, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04945054945054945, |
|
"grad_norm": 41.45735168457031, |
|
"learning_rate": 1.8646408839779003e-06, |
|
"loss": 8.8095, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"grad_norm": 45.60405731201172, |
|
"learning_rate": 2.071823204419889e-06, |
|
"loss": 9.3194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.054945054945054944, |
|
"eval_Qnli-dev_cosine_accuracy": 0.6015625, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.915002703666687, |
|
"eval_Qnli-dev_cosine_ap": 0.5561367291733588, |
|
"eval_Qnli-dev_cosine_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6884599328041077, |
|
"eval_Qnli-dev_cosine_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_cosine_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_dot_accuracy": 0.580078125, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 375.57147216796875, |
|
"eval_Qnli-dev_dot_ap": 0.49566240556276475, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 236.90142822265625, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.599609375, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 8.115190505981445, |
|
"eval_Qnli-dev_euclidean_ap": 0.5639171158048, |
|
"eval_Qnli-dev_euclidean_f1": 0.6307277628032345, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 16.781246185302734, |
|
"eval_Qnli-dev_euclidean_precision": 0.4624505928853755, |
|
"eval_Qnli-dev_euclidean_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.6171875, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 169.50537109375, |
|
"eval_Qnli-dev_manhattan_ap": 0.608914651260932, |
|
"eval_Qnli-dev_manhattan_f1": 0.629878869448183, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 245.56036376953125, |
|
"eval_Qnli-dev_manhattan_precision": 0.46153846153846156, |
|
"eval_Qnli-dev_manhattan_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_max_accuracy": 0.6171875, |
|
"eval_Qnli-dev_max_accuracy_threshold": 375.57147216796875, |
|
"eval_Qnli-dev_max_ap": 0.608914651260932, |
|
"eval_Qnli-dev_max_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_max_f1_threshold": 245.56036376953125, |
|
"eval_Qnli-dev_max_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.6640625, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.985245406627655, |
|
"eval_allNLI-dev_cosine_ap": 0.34847362780632896, |
|
"eval_allNLI-dev_cosine_f1": 0.5097451274362819, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7295986413955688, |
|
"eval_allNLI-dev_cosine_precision": 0.3441295546558704, |
|
"eval_allNLI-dev_cosine_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 511.2210693359375, |
|
"eval_allNLI-dev_dot_ap": 0.3336754845077054, |
|
"eval_allNLI-dev_dot_f1": 0.5066273932253312, |
|
"eval_allNLI-dev_dot_f1_threshold": 324.83251953125, |
|
"eval_allNLI-dev_dot_precision": 0.33992094861660077, |
|
"eval_allNLI-dev_dot_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.666015625, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.795613765716553, |
|
"eval_allNLI-dev_euclidean_ap": 0.3551307012605588, |
|
"eval_allNLI-dev_euclidean_f1": 0.5121212121212121, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 15.640409469604492, |
|
"eval_allNLI-dev_euclidean_precision": 0.3470225872689938, |
|
"eval_allNLI-dev_euclidean_recall": 0.976878612716763, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.6640625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 59.08678436279297, |
|
"eval_allNLI-dev_manhattan_ap": 0.37214209846872026, |
|
"eval_allNLI-dev_manhattan_f1": 0.5096870342771982, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 243.08975219726562, |
|
"eval_allNLI-dev_manhattan_precision": 0.3433734939759036, |
|
"eval_allNLI-dev_manhattan_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_max_accuracy": 0.666015625, |
|
"eval_allNLI-dev_max_accuracy_threshold": 511.2210693359375, |
|
"eval_allNLI-dev_max_ap": 0.37214209846872026, |
|
"eval_allNLI-dev_max_f1": 0.5121212121212121, |
|
"eval_allNLI-dev_max_f1_threshold": 324.83251953125, |
|
"eval_allNLI-dev_max_precision": 0.3470225872689938, |
|
"eval_allNLI-dev_max_recall": 0.9942196531791907, |
|
"eval_loss": 1.3227455615997314, |
|
"eval_runtime": 56.8227, |
|
"eval_samples_per_second": 26.503, |
|
"eval_sequential_score": 0.608914651260932, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.15490047433594056, |
|
"eval_sts-test_pearson_dot": 0.2911984188989889, |
|
"eval_sts-test_pearson_euclidean": 0.14360669882703436, |
|
"eval_sts-test_pearson_manhattan": 0.1892258838489897, |
|
"eval_sts-test_pearson_max": 0.2911984188989889, |
|
"eval_sts-test_spearman_cosine": 0.19757270192698087, |
|
"eval_sts-test_spearman_dot": 0.3042365279306765, |
|
"eval_sts-test_spearman_euclidean": 0.16524958184415522, |
|
"eval_sts-test_spearman_manhattan": 0.21029544078929435, |
|
"eval_sts-test_spearman_max": 0.3042365279306765, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06043956043956044, |
|
"grad_norm": 39.49834442138672, |
|
"learning_rate": 2.2790055248618783e-06, |
|
"loss": 8.9873, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06593406593406594, |
|
"grad_norm": 35.96696853637695, |
|
"learning_rate": 2.486187845303867e-06, |
|
"loss": 8.5575, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 37.535030364990234, |
|
"learning_rate": 2.693370165745856e-06, |
|
"loss": 8.8096, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 27.989038467407227, |
|
"learning_rate": 2.900552486187845e-06, |
|
"loss": 8.0996, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08241758241758242, |
|
"grad_norm": 27.93619728088379, |
|
"learning_rate": 3.107734806629834e-06, |
|
"loss": 8.1942, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08241758241758242, |
|
"eval_Qnli-dev_cosine_accuracy": 0.59765625, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9154062867164612, |
|
"eval_Qnli-dev_cosine_ap": 0.5603803949927255, |
|
"eval_Qnli-dev_cosine_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7283656597137451, |
|
"eval_Qnli-dev_cosine_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_cosine_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_dot_accuracy": 0.58203125, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 388.41607666015625, |
|
"eval_Qnli-dev_dot_ap": 0.497501149468079, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 236.0553741455078, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.58984375, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 8.941183090209961, |
|
"eval_Qnli-dev_euclidean_ap": 0.5662645897099401, |
|
"eval_Qnli-dev_euclidean_f1": 0.6307277628032345, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 15.756710052490234, |
|
"eval_Qnli-dev_euclidean_precision": 0.4624505928853755, |
|
"eval_Qnli-dev_euclidean_recall": 0.9915254237288136, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.615234375, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 163.0137939453125, |
|
"eval_Qnli-dev_manhattan_ap": 0.6139853178845948, |
|
"eval_Qnli-dev_manhattan_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 285.00634765625, |
|
"eval_Qnli-dev_manhattan_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_manhattan_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_max_accuracy": 0.615234375, |
|
"eval_Qnli-dev_max_accuracy_threshold": 388.41607666015625, |
|
"eval_Qnli-dev_max_ap": 0.6139853178845948, |
|
"eval_Qnli-dev_max_f1": 0.6315789473684211, |
|
"eval_Qnli-dev_max_f1_threshold": 285.00634765625, |
|
"eval_Qnli-dev_max_precision": 0.4633663366336634, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.6640625, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9861629009246826, |
|
"eval_allNLI-dev_cosine_ap": 0.35265879982526854, |
|
"eval_allNLI-dev_cosine_f1": 0.5096296296296297, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7308962941169739, |
|
"eval_allNLI-dev_cosine_precision": 0.3426294820717131, |
|
"eval_allNLI-dev_cosine_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_dot_accuracy": 0.662109375, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 498.35491943359375, |
|
"eval_allNLI-dev_dot_ap": 0.33242433107393726, |
|
"eval_allNLI-dev_dot_f1": 0.5058823529411766, |
|
"eval_allNLI-dev_dot_f1_threshold": 329.3536376953125, |
|
"eval_allNLI-dev_dot_precision": 0.33925049309664695, |
|
"eval_allNLI-dev_dot_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.666015625, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.617646217346191, |
|
"eval_allNLI-dev_euclidean_ap": 0.3596931915774687, |
|
"eval_allNLI-dev_euclidean_f1": 0.5104477611940298, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 15.696537971496582, |
|
"eval_allNLI-dev_euclidean_precision": 0.3440643863179074, |
|
"eval_allNLI-dev_euclidean_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.6640625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 57.86843490600586, |
|
"eval_allNLI-dev_manhattan_ap": 0.37425821741092197, |
|
"eval_allNLI-dev_manhattan_f1": 0.5089285714285714, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 239.49026489257812, |
|
"eval_allNLI-dev_manhattan_precision": 0.342685370741483, |
|
"eval_allNLI-dev_manhattan_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_max_accuracy": 0.666015625, |
|
"eval_allNLI-dev_max_accuracy_threshold": 498.35491943359375, |
|
"eval_allNLI-dev_max_ap": 0.37425821741092197, |
|
"eval_allNLI-dev_max_f1": 0.5104477611940298, |
|
"eval_allNLI-dev_max_f1_threshold": 329.3536376953125, |
|
"eval_allNLI-dev_max_precision": 0.3440643863179074, |
|
"eval_allNLI-dev_max_recall": 0.9942196531791907, |
|
"eval_loss": 1.224420189857483, |
|
"eval_runtime": 56.895, |
|
"eval_samples_per_second": 26.47, |
|
"eval_sequential_score": 0.6139853178845948, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.16376006142638552, |
|
"eval_sts-test_pearson_dot": 0.30438012531511927, |
|
"eval_sts-test_pearson_euclidean": 0.1505780981176037, |
|
"eval_sts-test_pearson_manhattan": 0.19362843879381605, |
|
"eval_sts-test_pearson_max": 0.30438012531511927, |
|
"eval_sts-test_spearman_cosine": 0.20852655768027648, |
|
"eval_sts-test_spearman_dot": 0.3147068910558995, |
|
"eval_sts-test_spearman_euclidean": 0.17318335397119086, |
|
"eval_sts-test_spearman_manhattan": 0.21532141020490103, |
|
"eval_sts-test_spearman_max": 0.3147068910558995, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08791208791208792, |
|
"grad_norm": 28.57341766357422, |
|
"learning_rate": 3.3149171270718227e-06, |
|
"loss": 8.1654, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09340659340659341, |
|
"grad_norm": 21.84433364868164, |
|
"learning_rate": 3.522099447513812e-06, |
|
"loss": 7.7336, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0989010989010989, |
|
"grad_norm": 23.60162353515625, |
|
"learning_rate": 3.7292817679558005e-06, |
|
"loss": 7.9535, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1043956043956044, |
|
"grad_norm": 22.785541534423828, |
|
"learning_rate": 3.936464088397789e-06, |
|
"loss": 7.9322, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 18.464128494262695, |
|
"learning_rate": 4.143646408839778e-06, |
|
"loss": 7.6812, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"eval_Qnli-dev_cosine_accuracy": 0.583984375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9213418960571289, |
|
"eval_Qnli-dev_cosine_ap": 0.5636458261475978, |
|
"eval_Qnli-dev_cosine_f1": 0.6328767123287672, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7996987700462341, |
|
"eval_Qnli-dev_cosine_precision": 0.4676113360323887, |
|
"eval_Qnli-dev_cosine_recall": 0.9788135593220338, |
|
"eval_Qnli-dev_dot_accuracy": 0.578125, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 391.3887939453125, |
|
"eval_Qnli-dev_dot_ap": 0.49946111380185365, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 237.4534149169922, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.59375, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 7.3581438064575195, |
|
"eval_Qnli-dev_euclidean_ap": 0.5724385046320207, |
|
"eval_Qnli-dev_euclidean_f1": 0.6321525885558583, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 13.227872848510742, |
|
"eval_Qnli-dev_euclidean_precision": 0.46586345381526106, |
|
"eval_Qnli-dev_euclidean_recall": 0.9830508474576272, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.62109375, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 162.46531677246094, |
|
"eval_Qnli-dev_manhattan_ap": 0.6199265668268106, |
|
"eval_Qnli-dev_manhattan_f1": 0.6332794830371568, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 181.62945556640625, |
|
"eval_Qnli-dev_manhattan_precision": 0.5117493472584856, |
|
"eval_Qnli-dev_manhattan_recall": 0.8305084745762712, |
|
"eval_Qnli-dev_max_accuracy": 0.62109375, |
|
"eval_Qnli-dev_max_accuracy_threshold": 391.3887939453125, |
|
"eval_Qnli-dev_max_ap": 0.6199265668268106, |
|
"eval_Qnli-dev_max_f1": 0.6332794830371568, |
|
"eval_Qnli-dev_max_f1_threshold": 237.4534149169922, |
|
"eval_Qnli-dev_max_precision": 0.5117493472584856, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.6640625, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.987220287322998, |
|
"eval_allNLI-dev_cosine_ap": 0.3616430753144169, |
|
"eval_allNLI-dev_cosine_f1": 0.5103857566765578, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7597876787185669, |
|
"eval_allNLI-dev_cosine_precision": 0.34331337325349304, |
|
"eval_allNLI-dev_cosine_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_dot_accuracy": 0.662109375, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 499.94134521484375, |
|
"eval_allNLI-dev_dot_ap": 0.32968058746925677, |
|
"eval_allNLI-dev_dot_f1": 0.5065885797950219, |
|
"eval_allNLI-dev_dot_f1_threshold": 326.2508850097656, |
|
"eval_allNLI-dev_dot_precision": 0.3392156862745098, |
|
"eval_allNLI-dev_dot_recall": 1.0, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.66796875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.4667768478393555, |
|
"eval_allNLI-dev_euclidean_ap": 0.36738456823550303, |
|
"eval_allNLI-dev_euclidean_f1": 0.5081723625557207, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 15.153694152832031, |
|
"eval_allNLI-dev_euclidean_precision": 0.342, |
|
"eval_allNLI-dev_euclidean_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.6640625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 56.36178970336914, |
|
"eval_allNLI-dev_manhattan_ap": 0.37895381964253766, |
|
"eval_allNLI-dev_manhattan_f1": 0.5074183976261127, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 235.34033203125, |
|
"eval_allNLI-dev_manhattan_precision": 0.3413173652694611, |
|
"eval_allNLI-dev_manhattan_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_max_accuracy": 0.66796875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 499.94134521484375, |
|
"eval_allNLI-dev_max_ap": 0.37895381964253766, |
|
"eval_allNLI-dev_max_f1": 0.5103857566765578, |
|
"eval_allNLI-dev_max_f1_threshold": 326.2508850097656, |
|
"eval_allNLI-dev_max_precision": 0.34331337325349304, |
|
"eval_allNLI-dev_max_recall": 1.0, |
|
"eval_loss": 1.1300753355026245, |
|
"eval_runtime": 56.883, |
|
"eval_samples_per_second": 26.475, |
|
"eval_sequential_score": 0.6199265668268106, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.1779758357593114, |
|
"eval_sts-test_pearson_dot": 0.3143294512111131, |
|
"eval_sts-test_pearson_euclidean": 0.16236578525254278, |
|
"eval_sts-test_pearson_manhattan": 0.20112989669839879, |
|
"eval_sts-test_pearson_max": 0.3143294512111131, |
|
"eval_sts-test_spearman_cosine": 0.22331353797679107, |
|
"eval_sts-test_spearman_dot": 0.32041258738078016, |
|
"eval_sts-test_spearman_euclidean": 0.18499862675500958, |
|
"eval_sts-test_spearman_manhattan": 0.22244513974949096, |
|
"eval_sts-test_spearman_max": 0.32041258738078016, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 16.04839324951172, |
|
"learning_rate": 4.3508287292817675e-06, |
|
"loss": 7.551, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12087912087912088, |
|
"grad_norm": 13.244675636291504, |
|
"learning_rate": 4.558011049723757e-06, |
|
"loss": 7.3788, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.12637362637362637, |
|
"grad_norm": 10.887700080871582, |
|
"learning_rate": 4.765193370165746e-06, |
|
"loss": 7.1746, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.13186813186813187, |
|
"grad_norm": 11.019057273864746, |
|
"learning_rate": 4.972375690607734e-06, |
|
"loss": 7.1849, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.13736263736263737, |
|
"grad_norm": 10.919517517089844, |
|
"learning_rate": 5.179558011049724e-06, |
|
"loss": 7.1085, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13736263736263737, |
|
"eval_Qnli-dev_cosine_accuracy": 0.58984375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9257529973983765, |
|
"eval_Qnli-dev_cosine_ap": 0.5652038241773732, |
|
"eval_Qnli-dev_cosine_f1": 0.6346153846153846, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.8198822140693665, |
|
"eval_Qnli-dev_cosine_precision": 0.4695121951219512, |
|
"eval_Qnli-dev_cosine_recall": 0.9788135593220338, |
|
"eval_Qnli-dev_dot_accuracy": 0.57421875, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 398.296875, |
|
"eval_Qnli-dev_dot_ap": 0.5000550252228433, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 245.24951171875, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.587890625, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 7.207454681396484, |
|
"eval_Qnli-dev_euclidean_ap": 0.5720947339439485, |
|
"eval_Qnli-dev_euclidean_f1": 0.631424375917768, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 11.139533996582031, |
|
"eval_Qnli-dev_euclidean_precision": 0.48314606741573035, |
|
"eval_Qnli-dev_euclidean_recall": 0.9110169491525424, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.630859375, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 159.41851806640625, |
|
"eval_Qnli-dev_manhattan_ap": 0.6195419467022242, |
|
"eval_Qnli-dev_manhattan_f1": 0.6365007541478129, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 184.75155639648438, |
|
"eval_Qnli-dev_manhattan_precision": 0.49414519906323184, |
|
"eval_Qnli-dev_manhattan_recall": 0.8940677966101694, |
|
"eval_Qnli-dev_max_accuracy": 0.630859375, |
|
"eval_Qnli-dev_max_accuracy_threshold": 398.296875, |
|
"eval_Qnli-dev_max_ap": 0.6195419467022242, |
|
"eval_Qnli-dev_max_f1": 0.6365007541478129, |
|
"eval_Qnli-dev_max_f1_threshold": 245.24951171875, |
|
"eval_Qnli-dev_max_precision": 0.49414519906323184, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.666015625, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9775335788726807, |
|
"eval_allNLI-dev_cosine_ap": 0.37066068068308244, |
|
"eval_allNLI-dev_cosine_f1": 0.5066666666666666, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7791957855224609, |
|
"eval_allNLI-dev_cosine_precision": 0.34063745019920316, |
|
"eval_allNLI-dev_cosine_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 524.8079223632812, |
|
"eval_allNLI-dev_dot_ap": 0.3301995657806253, |
|
"eval_allNLI-dev_dot_f1": 0.5058479532163743, |
|
"eval_allNLI-dev_dot_f1_threshold": 326.4276428222656, |
|
"eval_allNLI-dev_dot_precision": 0.3385518590998043, |
|
"eval_allNLI-dev_dot_recall": 1.0, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.66796875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.273993492126465, |
|
"eval_allNLI-dev_euclidean_ap": 0.3729474782349314, |
|
"eval_allNLI-dev_euclidean_f1": 0.5075075075075075, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 13.357471466064453, |
|
"eval_allNLI-dev_euclidean_precision": 0.34279918864097364, |
|
"eval_allNLI-dev_euclidean_recall": 0.976878612716763, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.666015625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 87.52474975585938, |
|
"eval_allNLI-dev_manhattan_ap": 0.3851618671264259, |
|
"eval_allNLI-dev_manhattan_f1": 0.5066273932253312, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 237.66885375976562, |
|
"eval_allNLI-dev_manhattan_precision": 0.33992094861660077, |
|
"eval_allNLI-dev_manhattan_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_max_accuracy": 0.66796875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 524.8079223632812, |
|
"eval_allNLI-dev_max_ap": 0.3851618671264259, |
|
"eval_allNLI-dev_max_f1": 0.5075075075075075, |
|
"eval_allNLI-dev_max_f1_threshold": 326.4276428222656, |
|
"eval_allNLI-dev_max_precision": 0.34279918864097364, |
|
"eval_allNLI-dev_max_recall": 1.0, |
|
"eval_loss": 1.0723015069961548, |
|
"eval_runtime": 56.965, |
|
"eval_samples_per_second": 26.437, |
|
"eval_sequential_score": 0.6195419467022242, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.19236546870487506, |
|
"eval_sts-test_pearson_dot": 0.31683240996339884, |
|
"eval_sts-test_pearson_euclidean": 0.17509975514192921, |
|
"eval_sts-test_pearson_manhattan": 0.2091062445542419, |
|
"eval_sts-test_pearson_max": 0.31683240996339884, |
|
"eval_sts-test_spearman_cosine": 0.23571321748312007, |
|
"eval_sts-test_spearman_dot": 0.3217659550277789, |
|
"eval_sts-test_spearman_euclidean": 0.1966071039599386, |
|
"eval_sts-test_spearman_manhattan": 0.23094926670295998, |
|
"eval_sts-test_spearman_max": 0.3217659550277789, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 11.845592498779297, |
|
"learning_rate": 5.386740331491712e-06, |
|
"loss": 7.3926, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.14835164835164835, |
|
"grad_norm": 11.545681953430176, |
|
"learning_rate": 5.593922651933701e-06, |
|
"loss": 7.1817, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 11.321039199829102, |
|
"learning_rate": 5.80110497237569e-06, |
|
"loss": 7.239, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.15934065934065933, |
|
"grad_norm": 9.933686256408691, |
|
"learning_rate": 6.00828729281768e-06, |
|
"loss": 7.0023, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16483516483516483, |
|
"grad_norm": 10.041378021240234, |
|
"learning_rate": 6.215469613259668e-06, |
|
"loss": 6.9898, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16483516483516483, |
|
"eval_Qnli-dev_cosine_accuracy": 0.59375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9396011233329773, |
|
"eval_Qnli-dev_cosine_ap": 0.57014985501852, |
|
"eval_Qnli-dev_cosine_f1": 0.6318681318681318, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.8371821045875549, |
|
"eval_Qnli-dev_cosine_precision": 0.46747967479674796, |
|
"eval_Qnli-dev_cosine_recall": 0.9745762711864406, |
|
"eval_Qnli-dev_dot_accuracy": 0.57421875, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 411.27325439453125, |
|
"eval_Qnli-dev_dot_ap": 0.49761647661900815, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 262.31964111328125, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.595703125, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 7.126171112060547, |
|
"eval_Qnli-dev_euclidean_ap": 0.5771748855905092, |
|
"eval_Qnli-dev_euclidean_f1": 0.6312925170068028, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 12.285415649414062, |
|
"eval_Qnli-dev_euclidean_precision": 0.4649298597194389, |
|
"eval_Qnli-dev_euclidean_recall": 0.9830508474576272, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.62890625, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 153.70269775390625, |
|
"eval_Qnli-dev_manhattan_ap": 0.621529978717656, |
|
"eval_Qnli-dev_manhattan_f1": 0.6396255850234009, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 176.74929809570312, |
|
"eval_Qnli-dev_manhattan_precision": 0.5061728395061729, |
|
"eval_Qnli-dev_manhattan_recall": 0.8686440677966102, |
|
"eval_Qnli-dev_max_accuracy": 0.62890625, |
|
"eval_Qnli-dev_max_accuracy_threshold": 411.27325439453125, |
|
"eval_Qnli-dev_max_ap": 0.621529978717656, |
|
"eval_Qnli-dev_max_f1": 0.6396255850234009, |
|
"eval_Qnli-dev_max_f1_threshold": 262.31964111328125, |
|
"eval_Qnli-dev_max_precision": 0.5061728395061729, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.66796875, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9714565277099609, |
|
"eval_allNLI-dev_cosine_ap": 0.3762859388623787, |
|
"eval_allNLI-dev_cosine_f1": 0.5060606060606061, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.8386883735656738, |
|
"eval_allNLI-dev_cosine_precision": 0.34291581108829566, |
|
"eval_allNLI-dev_cosine_recall": 0.9653179190751445, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 532.74462890625, |
|
"eval_allNLI-dev_dot_ap": 0.3295831980167142, |
|
"eval_allNLI-dev_dot_f1": 0.5036603221083455, |
|
"eval_allNLI-dev_dot_f1_threshold": 337.565185546875, |
|
"eval_allNLI-dev_dot_precision": 0.33725490196078434, |
|
"eval_allNLI-dev_dot_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.66796875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.119659423828125, |
|
"eval_allNLI-dev_euclidean_ap": 0.3787739041503637, |
|
"eval_allNLI-dev_euclidean_f1": 0.5098634294385432, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 12.496034622192383, |
|
"eval_allNLI-dev_euclidean_precision": 0.345679012345679, |
|
"eval_allNLI-dev_euclidean_recall": 0.9710982658959537, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.666015625, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 80.58773803710938, |
|
"eval_allNLI-dev_manhattan_ap": 0.3898279315596962, |
|
"eval_allNLI-dev_manhattan_f1": 0.5066273932253312, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 231.53036499023438, |
|
"eval_allNLI-dev_manhattan_precision": 0.33992094861660077, |
|
"eval_allNLI-dev_manhattan_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_max_accuracy": 0.66796875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 532.74462890625, |
|
"eval_allNLI-dev_max_ap": 0.3898279315596962, |
|
"eval_allNLI-dev_max_f1": 0.5098634294385432, |
|
"eval_allNLI-dev_max_f1_threshold": 337.565185546875, |
|
"eval_allNLI-dev_max_precision": 0.345679012345679, |
|
"eval_allNLI-dev_max_recall": 0.9942196531791907, |
|
"eval_loss": 1.0282095670700073, |
|
"eval_runtime": 57.0236, |
|
"eval_samples_per_second": 26.41, |
|
"eval_sequential_score": 0.621529978717656, |
|
"eval_steps_per_second": 0.21, |
|
"eval_sts-test_pearson_cosine": 0.2061136669654613, |
|
"eval_sts-test_pearson_dot": 0.31343978856163146, |
|
"eval_sts-test_pearson_euclidean": 0.18904663819220255, |
|
"eval_sts-test_pearson_manhattan": 0.21785478120910598, |
|
"eval_sts-test_pearson_max": 0.31343978856163146, |
|
"eval_sts-test_spearman_cosine": 0.24768503017928584, |
|
"eval_sts-test_spearman_dot": 0.31740404957995916, |
|
"eval_sts-test_spearman_euclidean": 0.20965200715300875, |
|
"eval_sts-test_spearman_manhattan": 0.2394205014793063, |
|
"eval_sts-test_spearman_max": 0.31740404957995916, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17032967032967034, |
|
"grad_norm": 9.322327613830566, |
|
"learning_rate": 6.422651933701657e-06, |
|
"loss": 6.9776, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.17582417582417584, |
|
"grad_norm": 8.770292282104492, |
|
"learning_rate": 6.629834254143645e-06, |
|
"loss": 6.8088, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1813186813186813, |
|
"grad_norm": 10.2041654586792, |
|
"learning_rate": 6.837016574585635e-06, |
|
"loss": 6.8916, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.18681318681318682, |
|
"grad_norm": 8.867596626281738, |
|
"learning_rate": 7.044198895027624e-06, |
|
"loss": 6.6931, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 9.029094696044922, |
|
"learning_rate": 7.251381215469613e-06, |
|
"loss": 6.5707, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"eval_Qnli-dev_cosine_accuracy": 0.609375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9358766078948975, |
|
"eval_Qnli-dev_cosine_ap": 0.5800406926662295, |
|
"eval_Qnli-dev_cosine_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6559731960296631, |
|
"eval_Qnli-dev_cosine_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_cosine_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_dot_accuracy": 0.572265625, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 415.3104248046875, |
|
"eval_Qnli-dev_dot_ap": 0.49721213365007333, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 280.5462951660156, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.59765625, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 7.095188140869141, |
|
"eval_Qnli-dev_euclidean_ap": 0.5853229647222131, |
|
"eval_Qnli-dev_euclidean_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 17.898162841796875, |
|
"eval_Qnli-dev_euclidean_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_euclidean_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.62890625, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 154.3598175048828, |
|
"eval_Qnli-dev_manhattan_ap": 0.6252613860599432, |
|
"eval_Qnli-dev_manhattan_f1": 0.6388888888888888, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 178.0916748046875, |
|
"eval_Qnli-dev_manhattan_precision": 0.5024271844660194, |
|
"eval_Qnli-dev_manhattan_recall": 0.8771186440677966, |
|
"eval_Qnli-dev_max_accuracy": 0.62890625, |
|
"eval_Qnli-dev_max_accuracy_threshold": 415.3104248046875, |
|
"eval_Qnli-dev_max_ap": 0.6252613860599432, |
|
"eval_Qnli-dev_max_f1": 0.6388888888888888, |
|
"eval_Qnli-dev_max_f1_threshold": 280.5462951660156, |
|
"eval_Qnli-dev_max_precision": 0.5024271844660194, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.673828125, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9733000993728638, |
|
"eval_allNLI-dev_cosine_ap": 0.383380605789286, |
|
"eval_allNLI-dev_cosine_f1": 0.5067466266866567, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.8367289900779724, |
|
"eval_allNLI-dev_cosine_precision": 0.34210526315789475, |
|
"eval_allNLI-dev_cosine_recall": 0.976878612716763, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 538.5443115234375, |
|
"eval_allNLI-dev_dot_ap": 0.33316876894590236, |
|
"eval_allNLI-dev_dot_f1": 0.5037037037037037, |
|
"eval_allNLI-dev_dot_f1_threshold": 373.13201904296875, |
|
"eval_allNLI-dev_dot_precision": 0.3386454183266932, |
|
"eval_allNLI-dev_dot_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.669921875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 5.027232646942139, |
|
"eval_allNLI-dev_euclidean_ap": 0.38454616945711223, |
|
"eval_allNLI-dev_euclidean_f1": 0.5075987841945289, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 12.088457107543945, |
|
"eval_allNLI-dev_euclidean_precision": 0.3443298969072165, |
|
"eval_allNLI-dev_euclidean_recall": 0.9653179190751445, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.66796875, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 79.79684448242188, |
|
"eval_allNLI-dev_manhattan_ap": 0.39522701729473053, |
|
"eval_allNLI-dev_manhattan_f1": 0.5073746312684366, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 228.69581604003906, |
|
"eval_allNLI-dev_manhattan_precision": 0.3405940594059406, |
|
"eval_allNLI-dev_manhattan_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_max_accuracy": 0.673828125, |
|
"eval_allNLI-dev_max_accuracy_threshold": 538.5443115234375, |
|
"eval_allNLI-dev_max_ap": 0.39522701729473053, |
|
"eval_allNLI-dev_max_f1": 0.5075987841945289, |
|
"eval_allNLI-dev_max_f1_threshold": 373.13201904296875, |
|
"eval_allNLI-dev_max_precision": 0.3443298969072165, |
|
"eval_allNLI-dev_max_recall": 0.9942196531791907, |
|
"eval_loss": 0.9846486449241638, |
|
"eval_runtime": 56.9294, |
|
"eval_samples_per_second": 26.454, |
|
"eval_sequential_score": 0.6252613860599432, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.22023788463370408, |
|
"eval_sts-test_pearson_dot": 0.30789406379111994, |
|
"eval_sts-test_pearson_euclidean": 0.20441439700667358, |
|
"eval_sts-test_pearson_manhattan": 0.22863465206140024, |
|
"eval_sts-test_pearson_max": 0.30789406379111994, |
|
"eval_sts-test_spearman_cosine": 0.2607946884475894, |
|
"eval_sts-test_spearman_dot": 0.3140236287969756, |
|
"eval_sts-test_spearman_euclidean": 0.2252769995703387, |
|
"eval_sts-test_spearman_manhattan": 0.2514165246078849, |
|
"eval_sts-test_spearman_max": 0.3140236287969756, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1978021978021978, |
|
"grad_norm": 8.10656452178955, |
|
"learning_rate": 7.458563535911601e-06, |
|
"loss": 6.6231, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2032967032967033, |
|
"grad_norm": 8.79859733581543, |
|
"learning_rate": 7.665745856353591e-06, |
|
"loss": 6.4951, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.2087912087912088, |
|
"grad_norm": 8.37302303314209, |
|
"learning_rate": 7.872928176795578e-06, |
|
"loss": 6.4607, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 9.559539794921875, |
|
"learning_rate": 8.08011049723757e-06, |
|
"loss": 6.4504, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 9.13284683227539, |
|
"learning_rate": 8.287292817679557e-06, |
|
"loss": 6.3649, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"eval_Qnli-dev_cosine_accuracy": 0.609375, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9314916133880615, |
|
"eval_Qnli-dev_cosine_ap": 0.585206574495468, |
|
"eval_Qnli-dev_cosine_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6699934005737305, |
|
"eval_Qnli-dev_cosine_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_cosine_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_dot_accuracy": 0.568359375, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 410.2371826171875, |
|
"eval_Qnli-dev_dot_ap": 0.4994333901484545, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 284.4015808105469, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.607421875, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 7.864487171173096, |
|
"eval_Qnli-dev_euclidean_ap": 0.594681744506572, |
|
"eval_Qnli-dev_euclidean_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 17.5451717376709, |
|
"eval_Qnli-dev_euclidean_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_euclidean_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.626953125, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 160.8733367919922, |
|
"eval_Qnli-dev_manhattan_ap": 0.629870060597291, |
|
"eval_Qnli-dev_manhattan_f1": 0.6411149825783973, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 176.17674255371094, |
|
"eval_Qnli-dev_manhattan_precision": 0.5443786982248521, |
|
"eval_Qnli-dev_manhattan_recall": 0.7796610169491526, |
|
"eval_Qnli-dev_max_accuracy": 0.626953125, |
|
"eval_Qnli-dev_max_accuracy_threshold": 410.2371826171875, |
|
"eval_Qnli-dev_max_ap": 0.629870060597291, |
|
"eval_Qnli-dev_max_f1": 0.6411149825783973, |
|
"eval_Qnli-dev_max_f1_threshold": 284.4015808105469, |
|
"eval_Qnli-dev_max_precision": 0.5443786982248521, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.671875, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9736817479133606, |
|
"eval_allNLI-dev_cosine_ap": 0.3931696251670499, |
|
"eval_allNLI-dev_cosine_f1": 0.5082212257100149, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.8241816759109497, |
|
"eval_allNLI-dev_cosine_precision": 0.34274193548387094, |
|
"eval_allNLI-dev_cosine_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_dot_accuracy": 0.66015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 535.3334350585938, |
|
"eval_allNLI-dev_dot_ap": 0.3396037265960903, |
|
"eval_allNLI-dev_dot_f1": 0.5065885797950219, |
|
"eval_allNLI-dev_dot_f1_threshold": 339.2867431640625, |
|
"eval_allNLI-dev_dot_precision": 0.3392156862745098, |
|
"eval_allNLI-dev_dot_recall": 1.0, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.671875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 5.143446922302246, |
|
"eval_allNLI-dev_euclidean_ap": 0.39265337020239105, |
|
"eval_allNLI-dev_euclidean_f1": 0.5096870342771982, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 13.402521133422852, |
|
"eval_allNLI-dev_euclidean_precision": 0.3433734939759036, |
|
"eval_allNLI-dev_euclidean_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.669921875, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 84.37629699707031, |
|
"eval_allNLI-dev_manhattan_ap": 0.4041341581180743, |
|
"eval_allNLI-dev_manhattan_f1": 0.5081240768094535, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 237.8282012939453, |
|
"eval_allNLI-dev_manhattan_precision": 0.3412698412698413, |
|
"eval_allNLI-dev_manhattan_recall": 0.9942196531791907, |
|
"eval_allNLI-dev_max_accuracy": 0.671875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 535.3334350585938, |
|
"eval_allNLI-dev_max_ap": 0.4041341581180743, |
|
"eval_allNLI-dev_max_f1": 0.5096870342771982, |
|
"eval_allNLI-dev_max_f1_threshold": 339.2867431640625, |
|
"eval_allNLI-dev_max_precision": 0.3433734939759036, |
|
"eval_allNLI-dev_max_recall": 1.0, |
|
"eval_loss": 0.9314436912536621, |
|
"eval_runtime": 56.8956, |
|
"eval_samples_per_second": 26.47, |
|
"eval_sequential_score": 0.629870060597291, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.2343040951892625, |
|
"eval_sts-test_pearson_dot": 0.30372753804498825, |
|
"eval_sts-test_pearson_euclidean": 0.21952614769670548, |
|
"eval_sts-test_pearson_manhattan": 0.24089043440705574, |
|
"eval_sts-test_pearson_max": 0.30372753804498825, |
|
"eval_sts-test_spearman_cosine": 0.2738163657359974, |
|
"eval_sts-test_spearman_dot": 0.3138135663760044, |
|
"eval_sts-test_spearman_euclidean": 0.2414264767711758, |
|
"eval_sts-test_spearman_manhattan": 0.26426890787444923, |
|
"eval_sts-test_spearman_max": 0.3138135663760044, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.22527472527472528, |
|
"grad_norm": 10.145929336547852, |
|
"learning_rate": 8.494475138121546e-06, |
|
"loss": 6.2244, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 11.5704345703125, |
|
"learning_rate": 8.701657458563535e-06, |
|
"loss": 6.007, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.23626373626373626, |
|
"grad_norm": 12.0188627243042, |
|
"learning_rate": 8.908839779005524e-06, |
|
"loss": 5.977, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24175824175824176, |
|
"grad_norm": 10.774896621704102, |
|
"learning_rate": 9.116022099447513e-06, |
|
"loss": 6.0748, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.24725274725274726, |
|
"grad_norm": 10.21664810180664, |
|
"learning_rate": 9.323204419889502e-06, |
|
"loss": 5.7946, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.24725274725274726, |
|
"eval_Qnli-dev_cosine_accuracy": 0.626953125, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9162209033966064, |
|
"eval_Qnli-dev_cosine_ap": 0.5960506092402249, |
|
"eval_Qnli-dev_cosine_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6565881967544556, |
|
"eval_Qnli-dev_cosine_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_cosine_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_dot_accuracy": 0.580078125, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 380.0188293457031, |
|
"eval_Qnli-dev_dot_ap": 0.5101751614495642, |
|
"eval_Qnli-dev_dot_f1": 0.6291834002677376, |
|
"eval_Qnli-dev_dot_f1_threshold": 259.723876953125, |
|
"eval_Qnli-dev_dot_precision": 0.4598825831702544, |
|
"eval_Qnli-dev_dot_recall": 0.9957627118644068, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.6171875, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 8.577693939208984, |
|
"eval_Qnli-dev_euclidean_ap": 0.6056069399590038, |
|
"eval_Qnli-dev_euclidean_f1": 0.6297297297297297, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 13.569040298461914, |
|
"eval_Qnli-dev_euclidean_precision": 0.4623015873015873, |
|
"eval_Qnli-dev_euclidean_recall": 0.9872881355932204, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.634765625, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 177.60205078125, |
|
"eval_Qnli-dev_manhattan_ap": 0.6403997437330724, |
|
"eval_Qnli-dev_manhattan_f1": 0.6466165413533835, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 206.83108520507812, |
|
"eval_Qnli-dev_manhattan_precision": 0.5011655011655012, |
|
"eval_Qnli-dev_manhattan_recall": 0.9110169491525424, |
|
"eval_Qnli-dev_max_accuracy": 0.634765625, |
|
"eval_Qnli-dev_max_accuracy_threshold": 380.0188293457031, |
|
"eval_Qnli-dev_max_ap": 0.6403997437330724, |
|
"eval_Qnli-dev_max_f1": 0.6466165413533835, |
|
"eval_Qnli-dev_max_f1_threshold": 259.723876953125, |
|
"eval_Qnli-dev_max_precision": 0.5011655011655012, |
|
"eval_Qnli-dev_max_recall": 0.9957627118644068, |
|
"eval_allNLI-dev_cosine_accuracy": 0.669921875, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9787822961807251, |
|
"eval_allNLI-dev_cosine_ap": 0.40222842666723646, |
|
"eval_allNLI-dev_cosine_f1": 0.5096870342771982, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.7735908031463623, |
|
"eval_allNLI-dev_cosine_precision": 0.3433734939759036, |
|
"eval_allNLI-dev_cosine_recall": 0.9884393063583815, |
|
"eval_allNLI-dev_dot_accuracy": 0.662109375, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 506.13720703125, |
|
"eval_allNLI-dev_dot_ap": 0.3502405242734096, |
|
"eval_allNLI-dev_dot_f1": 0.5065885797950219, |
|
"eval_allNLI-dev_dot_f1_threshold": 313.13623046875, |
|
"eval_allNLI-dev_dot_precision": 0.3392156862745098, |
|
"eval_allNLI-dev_dot_recall": 1.0, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.669921875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.530505180358887, |
|
"eval_allNLI-dev_euclidean_ap": 0.40012968794878784, |
|
"eval_allNLI-dev_euclidean_f1": 0.5105105105105106, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 13.752574920654297, |
|
"eval_allNLI-dev_euclidean_precision": 0.3448275862068966, |
|
"eval_allNLI-dev_euclidean_recall": 0.9826589595375722, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.669921875, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 84.84814453125, |
|
"eval_allNLI-dev_manhattan_ap": 0.411550187432712, |
|
"eval_allNLI-dev_manhattan_f1": 0.5152671755725191, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 184.89407348632812, |
|
"eval_allNLI-dev_manhattan_precision": 0.38461538461538464, |
|
"eval_allNLI-dev_manhattan_recall": 0.7803468208092486, |
|
"eval_allNLI-dev_max_accuracy": 0.669921875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 506.13720703125, |
|
"eval_allNLI-dev_max_ap": 0.411550187432712, |
|
"eval_allNLI-dev_max_f1": 0.5152671755725191, |
|
"eval_allNLI-dev_max_f1_threshold": 313.13623046875, |
|
"eval_allNLI-dev_max_precision": 0.38461538461538464, |
|
"eval_allNLI-dev_max_recall": 1.0, |
|
"eval_loss": 0.8549203276634216, |
|
"eval_runtime": 56.9038, |
|
"eval_samples_per_second": 26.466, |
|
"eval_sequential_score": 0.6403997437330724, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.24573194235606088, |
|
"eval_sts-test_pearson_dot": 0.30146061611878155, |
|
"eval_sts-test_pearson_euclidean": 0.2321150570038752, |
|
"eval_sts-test_pearson_manhattan": 0.25210046027138755, |
|
"eval_sts-test_pearson_max": 0.30146061611878155, |
|
"eval_sts-test_spearman_cosine": 0.2847268127657434, |
|
"eval_sts-test_spearman_dot": 0.31326692971810843, |
|
"eval_sts-test_spearman_euclidean": 0.2512167997282951, |
|
"eval_sts-test_spearman_manhattan": 0.27196800438674845, |
|
"eval_sts-test_spearman_max": 0.31326692971810843, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.25274725274725274, |
|
"grad_norm": 9.914789199829102, |
|
"learning_rate": 9.530386740331491e-06, |
|
"loss": 5.8751, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.25824175824175827, |
|
"grad_norm": 10.506736755371094, |
|
"learning_rate": 9.73756906077348e-06, |
|
"loss": 5.543, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.26373626373626374, |
|
"grad_norm": 8.838153839111328, |
|
"learning_rate": 9.944751381215468e-06, |
|
"loss": 5.5511, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 9.893248558044434, |
|
"learning_rate": 1.0151933701657457e-05, |
|
"loss": 5.411, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"grad_norm": 9.514713287353516, |
|
"learning_rate": 1.0359116022099448e-05, |
|
"loss": 5.378, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"eval_Qnli-dev_cosine_accuracy": 0.6328125, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8978130221366882, |
|
"eval_Qnli-dev_cosine_ap": 0.6213005329057975, |
|
"eval_Qnli-dev_cosine_f1": 0.6384266263237518, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.8041039705276489, |
|
"eval_Qnli-dev_cosine_precision": 0.4964705882352941, |
|
"eval_Qnli-dev_cosine_recall": 0.8940677966101694, |
|
"eval_Qnli-dev_dot_accuracy": 0.58984375, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 356.9898681640625, |
|
"eval_Qnli-dev_dot_ap": 0.5268541601927336, |
|
"eval_Qnli-dev_dot_f1": 0.6318758815232722, |
|
"eval_Qnli-dev_dot_f1_threshold": 291.1335754394531, |
|
"eval_Qnli-dev_dot_precision": 0.47357293868921774, |
|
"eval_Qnli-dev_dot_recall": 0.9491525423728814, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.638671875, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 10.523520469665527, |
|
"eval_Qnli-dev_euclidean_ap": 0.6291519886215534, |
|
"eval_Qnli-dev_euclidean_f1": 0.6337448559670782, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 14.391512870788574, |
|
"eval_Qnli-dev_euclidean_precision": 0.4685598377281947, |
|
"eval_Qnli-dev_euclidean_recall": 0.9788135593220338, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.650390625, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 189.05128479003906, |
|
"eval_Qnli-dev_manhattan_ap": 0.6556822594753774, |
|
"eval_Qnli-dev_manhattan_f1": 0.6430769230769231, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 233.51510620117188, |
|
"eval_Qnli-dev_manhattan_precision": 0.5048309178743962, |
|
"eval_Qnli-dev_manhattan_recall": 0.885593220338983, |
|
"eval_Qnli-dev_max_accuracy": 0.650390625, |
|
"eval_Qnli-dev_max_accuracy_threshold": 356.9898681640625, |
|
"eval_Qnli-dev_max_ap": 0.6556822594753774, |
|
"eval_Qnli-dev_max_f1": 0.6430769230769231, |
|
"eval_Qnli-dev_max_f1_threshold": 291.1335754394531, |
|
"eval_Qnli-dev_max_precision": 0.5048309178743962, |
|
"eval_Qnli-dev_max_recall": 0.9788135593220338, |
|
"eval_allNLI-dev_cosine_accuracy": 0.66796875, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9755356907844543, |
|
"eval_allNLI-dev_cosine_ap": 0.4066962429964717, |
|
"eval_allNLI-dev_cosine_f1": 0.5179856115107913, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.8535807132720947, |
|
"eval_allNLI-dev_cosine_precision": 0.37597911227154046, |
|
"eval_allNLI-dev_cosine_recall": 0.8323699421965318, |
|
"eval_allNLI-dev_dot_accuracy": 0.666015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 476.64141845703125, |
|
"eval_allNLI-dev_dot_ap": 0.3666780883869565, |
|
"eval_allNLI-dev_dot_f1": 0.5111821086261982, |
|
"eval_allNLI-dev_dot_f1_threshold": 348.523193359375, |
|
"eval_allNLI-dev_dot_precision": 0.35320088300220753, |
|
"eval_allNLI-dev_dot_recall": 0.9248554913294798, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.669921875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 4.862855911254883, |
|
"eval_allNLI-dev_euclidean_ap": 0.405432241017242, |
|
"eval_allNLI-dev_euclidean_f1": 0.5186567164179104, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 11.256525039672852, |
|
"eval_allNLI-dev_euclidean_precision": 0.38292011019283745, |
|
"eval_allNLI-dev_euclidean_recall": 0.8034682080924855, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.66796875, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 83.92739868164062, |
|
"eval_allNLI-dev_manhattan_ap": 0.41593074876648894, |
|
"eval_allNLI-dev_manhattan_f1": 0.5261194029850746, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 210.06704711914062, |
|
"eval_allNLI-dev_manhattan_precision": 0.3884297520661157, |
|
"eval_allNLI-dev_manhattan_recall": 0.815028901734104, |
|
"eval_allNLI-dev_max_accuracy": 0.669921875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 476.64141845703125, |
|
"eval_allNLI-dev_max_ap": 0.41593074876648894, |
|
"eval_allNLI-dev_max_f1": 0.5261194029850746, |
|
"eval_allNLI-dev_max_f1_threshold": 348.523193359375, |
|
"eval_allNLI-dev_max_precision": 0.3884297520661157, |
|
"eval_allNLI-dev_max_recall": 0.9248554913294798, |
|
"eval_loss": 0.794283390045166, |
|
"eval_runtime": 56.8695, |
|
"eval_samples_per_second": 26.482, |
|
"eval_sequential_score": 0.6556822594753774, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.24584625215267258, |
|
"eval_sts-test_pearson_dot": 0.2940464136694855, |
|
"eval_sts-test_pearson_euclidean": 0.2382635480850375, |
|
"eval_sts-test_pearson_manhattan": 0.2536304588799789, |
|
"eval_sts-test_pearson_max": 0.2940464136694855, |
|
"eval_sts-test_spearman_cosine": 0.28663621483553026, |
|
"eval_sts-test_spearman_dot": 0.3088057212212612, |
|
"eval_sts-test_spearman_euclidean": 0.2572386189636446, |
|
"eval_sts-test_spearman_manhattan": 0.27229562837139487, |
|
"eval_sts-test_spearman_max": 0.3088057212212612, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2802197802197802, |
|
"grad_norm": 9.770188331604004, |
|
"learning_rate": 1.0566298342541435e-05, |
|
"loss": 5.3831, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 9.599061965942383, |
|
"learning_rate": 1.0773480662983425e-05, |
|
"loss": 4.9729, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.29120879120879123, |
|
"grad_norm": 10.340251922607422, |
|
"learning_rate": 1.0980662983425412e-05, |
|
"loss": 5.0425, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2967032967032967, |
|
"grad_norm": 9.890629768371582, |
|
"learning_rate": 1.1187845303867403e-05, |
|
"loss": 4.9446, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3021978021978022, |
|
"grad_norm": 10.401249885559082, |
|
"learning_rate": 1.1395027624309392e-05, |
|
"loss": 4.9288, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.3021978021978022, |
|
"eval_Qnli-dev_cosine_accuracy": 0.640625, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8695281744003296, |
|
"eval_Qnli-dev_cosine_ap": 0.6314640856589909, |
|
"eval_Qnli-dev_cosine_f1": 0.6578512396694215, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7936367988586426, |
|
"eval_Qnli-dev_cosine_precision": 0.5392953929539296, |
|
"eval_Qnli-dev_cosine_recall": 0.8432203389830508, |
|
"eval_Qnli-dev_dot_accuracy": 0.609375, |
|
"eval_Qnli-dev_dot_accuracy_threshold": 351.17626953125, |
|
"eval_Qnli-dev_dot_ap": 0.5366456296706419, |
|
"eval_Qnli-dev_dot_f1": 0.6501650165016502, |
|
"eval_Qnli-dev_dot_f1_threshold": 316.48046875, |
|
"eval_Qnli-dev_dot_precision": 0.5324324324324324, |
|
"eval_Qnli-dev_dot_recall": 0.8347457627118644, |
|
"eval_Qnli-dev_euclidean_accuracy": 0.65234375, |
|
"eval_Qnli-dev_euclidean_accuracy_threshold": 10.764808654785156, |
|
"eval_Qnli-dev_euclidean_ap": 0.6460602994393339, |
|
"eval_Qnli-dev_euclidean_f1": 0.6393210749646393, |
|
"eval_Qnli-dev_euclidean_f1_threshold": 15.096710205078125, |
|
"eval_Qnli-dev_euclidean_precision": 0.47983014861995754, |
|
"eval_Qnli-dev_euclidean_recall": 0.9576271186440678, |
|
"eval_Qnli-dev_manhattan_accuracy": 0.658203125, |
|
"eval_Qnli-dev_manhattan_accuracy_threshold": 206.32894897460938, |
|
"eval_Qnli-dev_manhattan_ap": 0.6679289689394285, |
|
"eval_Qnli-dev_manhattan_f1": 0.652373660030628, |
|
"eval_Qnli-dev_manhattan_f1_threshold": 261.3590393066406, |
|
"eval_Qnli-dev_manhattan_precision": 0.5107913669064749, |
|
"eval_Qnli-dev_manhattan_recall": 0.902542372881356, |
|
"eval_Qnli-dev_max_accuracy": 0.658203125, |
|
"eval_Qnli-dev_max_accuracy_threshold": 351.17626953125, |
|
"eval_Qnli-dev_max_ap": 0.6679289689394285, |
|
"eval_Qnli-dev_max_f1": 0.6578512396694215, |
|
"eval_Qnli-dev_max_f1_threshold": 316.48046875, |
|
"eval_Qnli-dev_max_precision": 0.5392953929539296, |
|
"eval_Qnli-dev_max_recall": 0.9576271186440678, |
|
"eval_allNLI-dev_cosine_accuracy": 0.66796875, |
|
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9721465110778809, |
|
"eval_allNLI-dev_cosine_ap": 0.4140638596370657, |
|
"eval_allNLI-dev_cosine_f1": 0.5343511450381679, |
|
"eval_allNLI-dev_cosine_f1_threshold": 0.85741126537323, |
|
"eval_allNLI-dev_cosine_precision": 0.39886039886039887, |
|
"eval_allNLI-dev_cosine_recall": 0.8092485549132948, |
|
"eval_allNLI-dev_dot_accuracy": 0.666015625, |
|
"eval_allNLI-dev_dot_accuracy_threshold": 518.88671875, |
|
"eval_allNLI-dev_dot_ap": 0.3781233337023534, |
|
"eval_allNLI-dev_dot_f1": 0.514018691588785, |
|
"eval_allNLI-dev_dot_f1_threshold": 323.9651184082031, |
|
"eval_allNLI-dev_dot_precision": 0.35181236673773986, |
|
"eval_allNLI-dev_dot_recall": 0.953757225433526, |
|
"eval_allNLI-dev_euclidean_accuracy": 0.671875, |
|
"eval_allNLI-dev_euclidean_accuracy_threshold": 5.084325790405273, |
|
"eval_allNLI-dev_euclidean_ap": 0.41769294415599645, |
|
"eval_allNLI-dev_euclidean_f1": 0.5404339250493098, |
|
"eval_allNLI-dev_euclidean_f1_threshold": 11.333902359008789, |
|
"eval_allNLI-dev_euclidean_precision": 0.4101796407185629, |
|
"eval_allNLI-dev_euclidean_recall": 0.791907514450867, |
|
"eval_allNLI-dev_manhattan_accuracy": 0.671875, |
|
"eval_allNLI-dev_manhattan_accuracy_threshold": 114.41839599609375, |
|
"eval_allNLI-dev_manhattan_ap": 0.4272864144491257, |
|
"eval_allNLI-dev_manhattan_f1": 0.5384615384615384, |
|
"eval_allNLI-dev_manhattan_f1_threshold": 226.82566833496094, |
|
"eval_allNLI-dev_manhattan_precision": 0.3941018766756032, |
|
"eval_allNLI-dev_manhattan_recall": 0.8497109826589595, |
|
"eval_allNLI-dev_max_accuracy": 0.671875, |
|
"eval_allNLI-dev_max_accuracy_threshold": 518.88671875, |
|
"eval_allNLI-dev_max_ap": 0.4272864144491257, |
|
"eval_allNLI-dev_max_f1": 0.5404339250493098, |
|
"eval_allNLI-dev_max_f1_threshold": 323.9651184082031, |
|
"eval_allNLI-dev_max_precision": 0.4101796407185629, |
|
"eval_allNLI-dev_max_recall": 0.953757225433526, |
|
"eval_loss": 0.7178329229354858, |
|
"eval_runtime": 56.8366, |
|
"eval_samples_per_second": 26.497, |
|
"eval_sequential_score": 0.6679289689394285, |
|
"eval_steps_per_second": 0.211, |
|
"eval_sts-test_pearson_cosine": 0.2589065791031549, |
|
"eval_sts-test_pearson_dot": 0.28511212645281553, |
|
"eval_sts-test_pearson_euclidean": 0.2585939429800171, |
|
"eval_sts-test_pearson_manhattan": 0.27236487282828553, |
|
"eval_sts-test_pearson_max": 0.28511212645281553, |
|
"eval_sts-test_spearman_cosine": 0.31323211323674593, |
|
"eval_sts-test_spearman_dot": 0.2967423026930272, |
|
"eval_sts-test_spearman_euclidean": 0.2833925986586202, |
|
"eval_sts-test_spearman_manhattan": 0.29656486394161036, |
|
"eval_sts-test_spearman_max": 0.31323211323674593, |
|
"step": 55 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 55, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 640, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|