{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 50, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.725662708282471, "learning_rate": 3.3333333333333333e-06, "loss": 0.2963, "step": 6 }, { "epoch": 1.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.30786412954330444, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.30786412954330444, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 172.38189697265625, "eval_custom-arc-semantics-data-jp_dot_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 172.38189697265625, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.785310745239258, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.785310745239258, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 612.4951171875, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.6612959956709956, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 612.4951171875, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 612.4951171875, "eval_custom-arc-semantics-data-jp_max_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 612.4951171875, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.3111322224140167, "eval_runtime": 6.0296, "eval_samples_per_second": 1.824, "eval_steps_per_second": 0.332, "step": 6 }, { "epoch": 2.0, "grad_norm": 4.223657608032227, "learning_rate": 6.666666666666667e-06, "loss": 0.2833, "step": 12 }, { "epoch": 2.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3102341890335083, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3102341890335083, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 173.5974578857422, "eval_custom-arc-semantics-data-jp_dot_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 173.5974578857422, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.73798179626465, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.73798179626465, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 610.9443359375, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 610.9443359375, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 610.9443359375, "eval_custom-arc-semantics-data-jp_max_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 610.9443359375, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.309558629989624, "eval_runtime": 5.9488, "eval_samples_per_second": 1.849, "eval_steps_per_second": 0.336, "step": 12 }, { "epoch": 3.0, "grad_norm": 3.3170320987701416, "learning_rate": 1e-05, "loss": 0.2568, "step": 18 }, { "epoch": 3.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3211533725261688, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3211533725261688, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 179.75201416015625, "eval_custom-arc-semantics-data-jp_dot_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 179.75201416015625, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.536266326904297, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.536266326904297, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 605.5822143554688, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 605.5822143554688, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 605.5822143554688, "eval_custom-arc-semantics-data-jp_max_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 605.5822143554688, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.304984450340271, "eval_runtime": 7.2979, "eval_samples_per_second": 1.507, "eval_steps_per_second": 0.274, "step": 18 }, { "epoch": 4.0, "grad_norm": 2.2900643348693848, "learning_rate": 1.3333333333333333e-05, "loss": 0.2177, "step": 24 }, { "epoch": 4.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3480110168457031, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3480110168457031, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 195.35403442382812, "eval_custom-arc-semantics-data-jp_dot_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 195.35403442382812, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.041318893432617, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.041318893432617, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 593.150634765625, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 593.150634765625, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 593.150634765625, "eval_custom-arc-semantics-data-jp_max_ap": 0.7237959956709956, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 593.150634765625, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.29575565457344055, "eval_runtime": 6.0903, "eval_samples_per_second": 1.806, "eval_steps_per_second": 0.328, "step": 24 }, { "epoch": 5.0, "grad_norm": 1.5022671222686768, "learning_rate": 1.6666666666666667e-05, "loss": 0.1797, "step": 30 }, { "epoch": 5.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3965001106262207, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3965001106262207, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 223.7809295654297, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 223.7809295654297, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 26.097108840942383, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 26.097108840942383, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 570.329345703125, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.682129329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 570.329345703125, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 570.329345703125, "eval_custom-arc-semantics-data-jp_max_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 570.329345703125, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.28263404965400696, "eval_runtime": 6.0031, "eval_samples_per_second": 1.832, "eval_steps_per_second": 0.333, "step": 30 }, { "epoch": 6.0, "grad_norm": 0.6781304478645325, "learning_rate": 2e-05, "loss": 0.1419, "step": 36 }, { "epoch": 6.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.43997177481651306, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.43997177481651306, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 249.57528686523438, "eval_custom-arc-semantics-data-jp_dot_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 249.57528686523438, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 25.200597763061523, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 25.200597763061523, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 550.5452880859375, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 550.5452880859375, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 550.5452880859375, "eval_custom-arc-semantics-data-jp_max_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 550.5452880859375, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.276452898979187, "eval_runtime": 5.4943, "eval_samples_per_second": 2.002, "eval_steps_per_second": 0.364, "step": 36 }, { "epoch": 7.0, "grad_norm": 0.4487136900424957, "learning_rate": 1.7777777777777777e-05, "loss": 0.1057, "step": 42 }, { "epoch": 7.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.37805190682411194, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.37805190682411194, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 211.4693603515625, "eval_custom-arc-semantics-data-jp_dot_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 211.4693603515625, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 26.396636962890625, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 26.396636962890625, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 582.5875244140625, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 582.5875244140625, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 582.5875244140625, "eval_custom-arc-semantics-data-jp_max_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 582.5875244140625, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.295420378446579, "eval_runtime": 6.1313, "eval_samples_per_second": 1.794, "eval_steps_per_second": 0.326, "step": 42 }, { "epoch": 8.0, "grad_norm": 0.16805221140384674, "learning_rate": 1.555555555555556e-05, "loss": 0.0815, "step": 48 }, { "epoch": 8.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.31827351450920105, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.31827351450920105, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 178.09083557128906, "eval_custom-arc-semantics-data-jp_dot_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 178.09083557128906, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.63851547241211, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.63851547241211, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 612.4237060546875, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 612.4237060546875, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 612.4237060546875, "eval_custom-arc-semantics-data-jp_max_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 612.4237060546875, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.3164561986923218, "eval_runtime": 6.5067, "eval_samples_per_second": 1.691, "eval_steps_per_second": 0.307, "step": 48 }, { "epoch": 9.0, "grad_norm": 0.16789764165878296, "learning_rate": 1.3333333333333333e-05, "loss": 0.0664, "step": 54 }, { "epoch": 9.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.30298155546188354, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.30298155546188354, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 169.24606323242188, "eval_custom-arc-semantics-data-jp_dot_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 169.24606323242188, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.91897964477539, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.91897964477539, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 618.7899169921875, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 618.7899169921875, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 618.7899169921875, "eval_custom-arc-semantics-data-jp_max_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 618.7899169921875, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.31994009017944336, "eval_runtime": 6.8319, "eval_samples_per_second": 1.61, "eval_steps_per_second": 0.293, "step": 54 }, { "epoch": 10.0, "grad_norm": 0.1519097536802292, "learning_rate": 1.1111111111111113e-05, "loss": 0.0497, "step": 60 }, { "epoch": 10.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.31682828068733215, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.31682828068733215, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 176.45004272460938, "eval_custom-arc-semantics-data-jp_dot_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 176.45004272460938, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.598316192626953, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.598316192626953, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 611.2236328125, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 611.2236328125, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 611.2236328125, "eval_custom-arc-semantics-data-jp_max_ap": 0.650879329004329, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 611.2236328125, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.31398141384124756, "eval_runtime": 6.4168, "eval_samples_per_second": 1.714, "eval_steps_per_second": 0.312, "step": 60 }, { "epoch": 11.0, "grad_norm": 0.17287826538085938, "learning_rate": 8.888888888888888e-06, "loss": 0.0402, "step": 66 }, { "epoch": 11.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.33553600311279297, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.33553600311279297, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 186.30569458007812, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 186.30569458007812, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.178071975708008, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.178071975708008, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 601.0965576171875, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 601.0965576171875, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 601.0965576171875, "eval_custom-arc-semantics-data-jp_max_ap": 0.6321293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 601.0965576171875, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.3081442415714264, "eval_runtime": 6.4435, "eval_samples_per_second": 1.707, "eval_steps_per_second": 0.31, "step": 66 }, { "epoch": 12.0, "grad_norm": 0.1389617770910263, "learning_rate": 6.666666666666667e-06, "loss": 0.0346, "step": 72 }, { "epoch": 12.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3391532003879547, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3391532003879547, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 187.9766082763672, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 187.9766082763672, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.080123901367188, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.080123901367188, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 598.4072265625, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 598.4072265625, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 598.4072265625, "eval_custom-arc-semantics-data-jp_max_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 598.4072265625, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.30722811818122864, "eval_runtime": 6.555, "eval_samples_per_second": 1.678, "eval_steps_per_second": 0.305, "step": 72 }, { "epoch": 13.0, "grad_norm": 0.10099858045578003, "learning_rate": 4.444444444444444e-06, "loss": 0.0293, "step": 78 }, { "epoch": 13.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.34109100699424744, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.34109100699424744, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 189.05935668945312, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 189.05935668945312, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.041587829589844, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.041587829589844, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 597.2989501953125, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 597.2989501953125, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 597.2989501953125, "eval_custom-arc-semantics-data-jp_max_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 597.2989501953125, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.30660775303840637, "eval_runtime": 5.9875, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.334, "step": 78 }, { "epoch": 14.0, "grad_norm": 0.12931989133358002, "learning_rate": 2.222222222222222e-06, "loss": 0.0302, "step": 84 }, { "epoch": 14.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3385917544364929, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3385917544364929, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 187.77444458007812, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 187.77444458007812, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.100971221923828, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.100971221923828, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 598.5726318359375, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 598.5726318359375, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 598.5726318359375, "eval_custom-arc-semantics-data-jp_max_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 598.5726318359375, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.30758312344551086, "eval_runtime": 5.1895, "eval_samples_per_second": 2.12, "eval_steps_per_second": 0.385, "step": 84 }, { "epoch": 15.0, "grad_norm": 0.16915012896060944, "learning_rate": 0.0, "loss": 0.0287, "step": 90 }, { "epoch": 15.0, "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.3379952907562256, "eval_custom-arc-semantics-data-jp_cosine_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_cosine_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.3379952907562256, "eval_custom-arc-semantics-data-jp_cosine_precision": 0.7, "eval_custom-arc-semantics-data-jp_cosine_recall": 0.875, "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 187.5118865966797, "eval_custom-arc-semantics-data-jp_dot_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_dot_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 187.5118865966797, "eval_custom-arc-semantics-data-jp_dot_precision": 0.7, "eval_custom-arc-semantics-data-jp_dot_recall": 0.875, "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 27.118305206298828, "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 27.118305206298828, "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.7, "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.875, "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 598.9317626953125, "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.619629329004329, "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 598.9317626953125, "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.7, "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.875, "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6363636363636364, "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 598.9317626953125, "eval_custom-arc-semantics-data-jp_max_ap": 0.6946293290043289, "eval_custom-arc-semantics-data-jp_max_f1": 0.7777777777777777, "eval_custom-arc-semantics-data-jp_max_f1_threshold": 598.9317626953125, "eval_custom-arc-semantics-data-jp_max_precision": 0.7, "eval_custom-arc-semantics-data-jp_max_recall": 0.875, "eval_loss": 0.3078480362892151, "eval_runtime": 5.9867, "eval_samples_per_second": 1.837, "eval_steps_per_second": 0.334, "step": 90 } ], "logging_steps": 500, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }