{ "best_metric": 0.025447649881243706, "best_model_checkpoint": "/kaggle/working/output/checkpoint-98", "epoch": 32.857142857142854, "eval_steps": 500, "global_step": 115, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_LCC": -0.13879293215538938, "eval_SROCC": -0.15851140456182472, "eval_loss": 0.26562368869781494, "eval_runtime": 38.0061, "eval_samples_per_second": 1.316, "eval_steps_per_second": 0.053, "step": 3 }, { "epoch": 2.0, "eval_LCC": -0.12769050797605289, "eval_SROCC": -0.18098439375750297, "eval_loss": 0.06580950319766998, "eval_runtime": 35.9745, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.056, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 6.6160054206848145, "learning_rate": 1.3333333333333333e-05, "loss": 0.2144, "step": 10 }, { "epoch": 2.857142857142857, "eval_LCC": -0.11849463789687457, "eval_SROCC": -0.16840336134453782, "eval_loss": 0.1341191679239273, "eval_runtime": 35.7755, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.056, "step": 10 }, { "epoch": 4.0, "eval_LCC": -0.13792803325263353, "eval_SROCC": -0.23822328931572626, "eval_loss": 0.05623332038521767, "eval_runtime": 36.0015, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.056, "step": 14 }, { "epoch": 4.857142857142857, "eval_LCC": -0.14276549217386758, "eval_SROCC": -0.1657142857142857, "eval_loss": 0.061644963920116425, "eval_runtime": 35.7987, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.056, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 1.571254014968872, "learning_rate": 1.925925925925926e-05, "loss": 0.0575, "step": 20 }, { "epoch": 6.0, "eval_LCC": -0.1185043581646345, "eval_SROCC": -0.15006002400960383, "eval_loss": 0.05936155468225479, "eval_runtime": 36.1077, "eval_samples_per_second": 1.385, "eval_steps_per_second": 0.055, "step": 21 }, { "epoch": 6.857142857142857, "eval_LCC": -0.11221146661280773, "eval_SROCC": -0.12038415366146459, "eval_loss": 0.05129233002662659, "eval_runtime": 35.8427, "eval_samples_per_second": 1.395, "eval_steps_per_second": 0.056, "step": 24 }, { "epoch": 8.0, "eval_LCC": -0.09867320013209922, "eval_SROCC": -0.06929171668667466, "eval_loss": 0.041676923632621765, "eval_runtime": 36.0348, "eval_samples_per_second": 1.388, "eval_steps_per_second": 0.056, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 0.800278902053833, "learning_rate": 1.7777777777777777e-05, "loss": 0.0201, "step": 30 }, { "epoch": 8.857142857142858, "eval_LCC": -0.08130033550198683, "eval_SROCC": -0.07140456182472989, "eval_loss": 0.04058969393372536, "eval_runtime": 36.0799, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.055, "step": 31 }, { "epoch": 10.0, "eval_LCC": -0.0384540491268753, "eval_SROCC": -0.02357743097238895, "eval_loss": 0.034295279532670975, "eval_runtime": 36.1741, "eval_samples_per_second": 1.382, "eval_steps_per_second": 0.055, "step": 35 }, { "epoch": 10.857142857142858, "eval_LCC": 0.0010666390633979736, "eval_SROCC": 0.00744297719087635, "eval_loss": 0.031487837433815, "eval_runtime": 36.0877, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.055, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 0.3433558940887451, "learning_rate": 1.6296296296296297e-05, "loss": 0.0142, "step": 40 }, { "epoch": 12.0, "eval_LCC": 0.03256796518969046, "eval_SROCC": 0.02175270108043217, "eval_loss": 0.03087497688829899, "eval_runtime": 35.9382, "eval_samples_per_second": 1.391, "eval_steps_per_second": 0.056, "step": 42 }, { "epoch": 12.857142857142858, "eval_LCC": 0.04658237209190174, "eval_SROCC": 0.03807923169267707, "eval_loss": 0.030963044613599777, "eval_runtime": 36.1523, "eval_samples_per_second": 1.383, "eval_steps_per_second": 0.055, "step": 45 }, { "epoch": 14.0, "eval_LCC": 0.06813425647249395, "eval_SROCC": 0.05354141656662665, "eval_loss": 0.029918596148490906, "eval_runtime": 35.716, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 0.8214556574821472, "learning_rate": 1.4814814814814815e-05, "loss": 0.0097, "step": 50 }, { "epoch": 14.857142857142858, "eval_LCC": 0.07492776273224191, "eval_SROCC": 0.06036014405762304, "eval_loss": 0.03141804039478302, "eval_runtime": 36.0969, "eval_samples_per_second": 1.385, "eval_steps_per_second": 0.055, "step": 52 }, { "epoch": 16.0, "eval_LCC": 0.10494559746287975, "eval_SROCC": 0.07851140456182472, "eval_loss": 0.02881774678826332, "eval_runtime": 35.9818, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.056, "step": 56 }, { "epoch": 16.857142857142858, "eval_LCC": 0.12693485096467225, "eval_SROCC": 0.09435774309723889, "eval_loss": 0.0282765943557024, "eval_runtime": 36.0917, "eval_samples_per_second": 1.385, "eval_steps_per_second": 0.055, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 0.5231106877326965, "learning_rate": 1.3333333333333333e-05, "loss": 0.0083, "step": 60 }, { "epoch": 18.0, "eval_LCC": 0.14480085260022996, "eval_SROCC": 0.1022328931572629, "eval_loss": 0.0297652930021286, "eval_runtime": 35.9054, "eval_samples_per_second": 1.393, "eval_steps_per_second": 0.056, "step": 63 }, { "epoch": 18.857142857142858, "eval_LCC": 0.16514532692258366, "eval_SROCC": 0.11183673469387755, "eval_loss": 0.02742738462984562, "eval_runtime": 35.7423, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.056, "step": 66 }, { "epoch": 20.0, "grad_norm": 0.43896809220314026, "learning_rate": 1.1851851851851852e-05, "loss": 0.0063, "step": 70 }, { "epoch": 20.0, "eval_LCC": 0.17032522192761249, "eval_SROCC": 0.12240096038415366, "eval_loss": 0.028603849932551384, "eval_runtime": 36.0386, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.055, "step": 70 }, { "epoch": 20.857142857142858, "eval_LCC": 0.1833462608756279, "eval_SROCC": 0.13709483793517407, "eval_loss": 0.028336353600025177, "eval_runtime": 35.9704, "eval_samples_per_second": 1.39, "eval_steps_per_second": 0.056, "step": 73 }, { "epoch": 22.0, "eval_LCC": 0.19435351727111327, "eval_SROCC": 0.13171668667466988, "eval_loss": 0.028169861063361168, "eval_runtime": 36.261, "eval_samples_per_second": 1.379, "eval_steps_per_second": 0.055, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 0.3503696024417877, "learning_rate": 1.037037037037037e-05, "loss": 0.0059, "step": 80 }, { "epoch": 22.857142857142858, "eval_LCC": 0.20354883288709705, "eval_SROCC": 0.13815126050420168, "eval_loss": 0.027723778039216995, "eval_runtime": 36.0717, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.055, "step": 80 }, { "epoch": 24.0, "eval_LCC": 0.21464098120344227, "eval_SROCC": 0.1479471788715486, "eval_loss": 0.027026496827602386, "eval_runtime": 36.0759, "eval_samples_per_second": 1.386, "eval_steps_per_second": 0.055, "step": 84 }, { "epoch": 24.857142857142858, "eval_LCC": 0.21974470798595805, "eval_SROCC": 0.1499639855942377, "eval_loss": 0.026259804144501686, "eval_runtime": 35.9282, "eval_samples_per_second": 1.392, "eval_steps_per_second": 0.056, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 0.24314194917678833, "learning_rate": 8.888888888888888e-06, "loss": 0.0046, "step": 90 }, { "epoch": 26.0, "eval_LCC": 0.21990605203497599, "eval_SROCC": 0.13642256902761105, "eval_loss": 0.026854444295167923, "eval_runtime": 36.0574, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.055, "step": 91 }, { "epoch": 26.857142857142858, "eval_LCC": 0.22513542090356625, "eval_SROCC": 0.1406482593037215, "eval_loss": 0.025891508907079697, "eval_runtime": 36.0239, "eval_samples_per_second": 1.388, "eval_steps_per_second": 0.056, "step": 94 }, { "epoch": 28.0, "eval_LCC": 0.23080971037643985, "eval_SROCC": 0.15515006002400963, "eval_loss": 0.025447649881243706, "eval_runtime": 36.1194, "eval_samples_per_second": 1.384, "eval_steps_per_second": 0.055, "step": 98 }, { "epoch": 28.571428571428573, "grad_norm": 0.2996827960014343, "learning_rate": 7.4074074074074075e-06, "loss": 0.0039, "step": 100 }, { "epoch": 28.857142857142858, "eval_LCC": 0.22614882286734456, "eval_SROCC": 0.14804321728691477, "eval_loss": 0.026662170886993408, "eval_runtime": 35.7837, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.056, "step": 101 }, { "epoch": 30.0, "eval_LCC": 0.22468420274739098, "eval_SROCC": 0.14890756302521005, "eval_loss": 0.02701684460043907, "eval_runtime": 35.9924, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.056, "step": 105 }, { "epoch": 30.857142857142858, "eval_LCC": 0.2319344817291397, "eval_SROCC": 0.15764705882352942, "eval_loss": 0.026096588000655174, "eval_runtime": 35.7006, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 108 }, { "epoch": 31.428571428571427, "grad_norm": 0.31328070163726807, "learning_rate": 5.925925925925926e-06, "loss": 0.0041, "step": 110 }, { "epoch": 32.0, "eval_LCC": 0.23109019043125076, "eval_SROCC": 0.1630252100840336, "eval_loss": 0.026817049831151962, "eval_runtime": 36.031, "eval_samples_per_second": 1.388, "eval_steps_per_second": 0.056, "step": 112 }, { "epoch": 32.857142857142854, "eval_LCC": 0.22573394181226267, "eval_SROCC": 0.1569747899159664, "eval_loss": 0.028198465704917908, "eval_runtime": 35.9924, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.056, "step": 115 }, { "epoch": 32.857142857142854, "step": 115, "total_flos": 2.895570431785304e+18, "train_loss": 0.030544885701459388, "train_runtime": 6073.1055, "train_samples_per_second": 1.762, "train_steps_per_second": 0.025 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.895570431785304e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }