|
{ |
|
"best_metric": 0.025447649881243706, |
|
"best_model_checkpoint": "/kaggle/working/output/checkpoint-98", |
|
"epoch": 32.857142857142854, |
|
"eval_steps": 500, |
|
"global_step": 115, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"eval_LCC": -0.13879293215538938, |
|
"eval_SROCC": -0.15851140456182472, |
|
"eval_loss": 0.26562368869781494, |
|
"eval_runtime": 38.0061, |
|
"eval_samples_per_second": 1.316, |
|
"eval_steps_per_second": 0.053, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LCC": -0.12769050797605289, |
|
"eval_SROCC": -0.18098439375750297, |
|
"eval_loss": 0.06580950319766998, |
|
"eval_runtime": 35.9745, |
|
"eval_samples_per_second": 1.39, |
|
"eval_steps_per_second": 0.056, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 6.6160054206848145, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.2144, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_LCC": -0.11849463789687457, |
|
"eval_SROCC": -0.16840336134453782, |
|
"eval_loss": 0.1341191679239273, |
|
"eval_runtime": 35.7755, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.056, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LCC": -0.13792803325263353, |
|
"eval_SROCC": -0.23822328931572626, |
|
"eval_loss": 0.05623332038521767, |
|
"eval_runtime": 36.0015, |
|
"eval_samples_per_second": 1.389, |
|
"eval_steps_per_second": 0.056, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"eval_LCC": -0.14276549217386758, |
|
"eval_SROCC": -0.1657142857142857, |
|
"eval_loss": 0.061644963920116425, |
|
"eval_runtime": 35.7987, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.056, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 1.571254014968872, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.0575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_LCC": -0.1185043581646345, |
|
"eval_SROCC": -0.15006002400960383, |
|
"eval_loss": 0.05936155468225479, |
|
"eval_runtime": 36.1077, |
|
"eval_samples_per_second": 1.385, |
|
"eval_steps_per_second": 0.055, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_LCC": -0.11221146661280773, |
|
"eval_SROCC": -0.12038415366146459, |
|
"eval_loss": 0.05129233002662659, |
|
"eval_runtime": 35.8427, |
|
"eval_samples_per_second": 1.395, |
|
"eval_steps_per_second": 0.056, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LCC": -0.09867320013209922, |
|
"eval_SROCC": -0.06929171668667466, |
|
"eval_loss": 0.041676923632621765, |
|
"eval_runtime": 36.0348, |
|
"eval_samples_per_second": 1.388, |
|
"eval_steps_per_second": 0.056, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 0.800278902053833, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.0201, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.857142857142858, |
|
"eval_LCC": -0.08130033550198683, |
|
"eval_SROCC": -0.07140456182472989, |
|
"eval_loss": 0.04058969393372536, |
|
"eval_runtime": 36.0799, |
|
"eval_samples_per_second": 1.386, |
|
"eval_steps_per_second": 0.055, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_LCC": -0.0384540491268753, |
|
"eval_SROCC": -0.02357743097238895, |
|
"eval_loss": 0.034295279532670975, |
|
"eval_runtime": 36.1741, |
|
"eval_samples_per_second": 1.382, |
|
"eval_steps_per_second": 0.055, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_LCC": 0.0010666390633979736, |
|
"eval_SROCC": 0.00744297719087635, |
|
"eval_loss": 0.031487837433815, |
|
"eval_runtime": 36.0877, |
|
"eval_samples_per_second": 1.386, |
|
"eval_steps_per_second": 0.055, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 0.3433558940887451, |
|
"learning_rate": 1.6296296296296297e-05, |
|
"loss": 0.0142, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_LCC": 0.03256796518969046, |
|
"eval_SROCC": 0.02175270108043217, |
|
"eval_loss": 0.03087497688829899, |
|
"eval_runtime": 35.9382, |
|
"eval_samples_per_second": 1.391, |
|
"eval_steps_per_second": 0.056, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"eval_LCC": 0.04658237209190174, |
|
"eval_SROCC": 0.03807923169267707, |
|
"eval_loss": 0.030963044613599777, |
|
"eval_runtime": 36.1523, |
|
"eval_samples_per_second": 1.383, |
|
"eval_steps_per_second": 0.055, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_LCC": 0.06813425647249395, |
|
"eval_SROCC": 0.05354141656662665, |
|
"eval_loss": 0.029918596148490906, |
|
"eval_runtime": 35.716, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.8214556574821472, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0097, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_LCC": 0.07492776273224191, |
|
"eval_SROCC": 0.06036014405762304, |
|
"eval_loss": 0.03141804039478302, |
|
"eval_runtime": 36.0969, |
|
"eval_samples_per_second": 1.385, |
|
"eval_steps_per_second": 0.055, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_LCC": 0.10494559746287975, |
|
"eval_SROCC": 0.07851140456182472, |
|
"eval_loss": 0.02881774678826332, |
|
"eval_runtime": 35.9818, |
|
"eval_samples_per_second": 1.39, |
|
"eval_steps_per_second": 0.056, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.857142857142858, |
|
"eval_LCC": 0.12693485096467225, |
|
"eval_SROCC": 0.09435774309723889, |
|
"eval_loss": 0.0282765943557024, |
|
"eval_runtime": 36.0917, |
|
"eval_samples_per_second": 1.385, |
|
"eval_steps_per_second": 0.055, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 0.5231106877326965, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0083, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_LCC": 0.14480085260022996, |
|
"eval_SROCC": 0.1022328931572629, |
|
"eval_loss": 0.0297652930021286, |
|
"eval_runtime": 35.9054, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.056, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_LCC": 0.16514532692258366, |
|
"eval_SROCC": 0.11183673469387755, |
|
"eval_loss": 0.02742738462984562, |
|
"eval_runtime": 35.7423, |
|
"eval_samples_per_second": 1.399, |
|
"eval_steps_per_second": 0.056, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.43896809220314026, |
|
"learning_rate": 1.1851851851851852e-05, |
|
"loss": 0.0063, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_LCC": 0.17032522192761249, |
|
"eval_SROCC": 0.12240096038415366, |
|
"eval_loss": 0.028603849932551384, |
|
"eval_runtime": 36.0386, |
|
"eval_samples_per_second": 1.387, |
|
"eval_steps_per_second": 0.055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.857142857142858, |
|
"eval_LCC": 0.1833462608756279, |
|
"eval_SROCC": 0.13709483793517407, |
|
"eval_loss": 0.028336353600025177, |
|
"eval_runtime": 35.9704, |
|
"eval_samples_per_second": 1.39, |
|
"eval_steps_per_second": 0.056, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_LCC": 0.19435351727111327, |
|
"eval_SROCC": 0.13171668667466988, |
|
"eval_loss": 0.028169861063361168, |
|
"eval_runtime": 36.261, |
|
"eval_samples_per_second": 1.379, |
|
"eval_steps_per_second": 0.055, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 0.3503696024417877, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.0059, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_LCC": 0.20354883288709705, |
|
"eval_SROCC": 0.13815126050420168, |
|
"eval_loss": 0.027723778039216995, |
|
"eval_runtime": 36.0717, |
|
"eval_samples_per_second": 1.386, |
|
"eval_steps_per_second": 0.055, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_LCC": 0.21464098120344227, |
|
"eval_SROCC": 0.1479471788715486, |
|
"eval_loss": 0.027026496827602386, |
|
"eval_runtime": 36.0759, |
|
"eval_samples_per_second": 1.386, |
|
"eval_steps_per_second": 0.055, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 24.857142857142858, |
|
"eval_LCC": 0.21974470798595805, |
|
"eval_SROCC": 0.1499639855942377, |
|
"eval_loss": 0.026259804144501686, |
|
"eval_runtime": 35.9282, |
|
"eval_samples_per_second": 1.392, |
|
"eval_steps_per_second": 0.056, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.24314194917678833, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_LCC": 0.21990605203497599, |
|
"eval_SROCC": 0.13642256902761105, |
|
"eval_loss": 0.026854444295167923, |
|
"eval_runtime": 36.0574, |
|
"eval_samples_per_second": 1.387, |
|
"eval_steps_per_second": 0.055, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_LCC": 0.22513542090356625, |
|
"eval_SROCC": 0.1406482593037215, |
|
"eval_loss": 0.025891508907079697, |
|
"eval_runtime": 36.0239, |
|
"eval_samples_per_second": 1.388, |
|
"eval_steps_per_second": 0.056, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_LCC": 0.23080971037643985, |
|
"eval_SROCC": 0.15515006002400963, |
|
"eval_loss": 0.025447649881243706, |
|
"eval_runtime": 36.1194, |
|
"eval_samples_per_second": 1.384, |
|
"eval_steps_per_second": 0.055, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.2996827960014343, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 28.857142857142858, |
|
"eval_LCC": 0.22614882286734456, |
|
"eval_SROCC": 0.14804321728691477, |
|
"eval_loss": 0.026662170886993408, |
|
"eval_runtime": 35.7837, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.056, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_LCC": 0.22468420274739098, |
|
"eval_SROCC": 0.14890756302521005, |
|
"eval_loss": 0.02701684460043907, |
|
"eval_runtime": 35.9924, |
|
"eval_samples_per_second": 1.389, |
|
"eval_steps_per_second": 0.056, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 30.857142857142858, |
|
"eval_LCC": 0.2319344817291397, |
|
"eval_SROCC": 0.15764705882352942, |
|
"eval_loss": 0.026096588000655174, |
|
"eval_runtime": 35.7006, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 0.31328070163726807, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0041, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_LCC": 0.23109019043125076, |
|
"eval_SROCC": 0.1630252100840336, |
|
"eval_loss": 0.026817049831151962, |
|
"eval_runtime": 36.031, |
|
"eval_samples_per_second": 1.388, |
|
"eval_steps_per_second": 0.056, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"eval_LCC": 0.22573394181226267, |
|
"eval_SROCC": 0.1569747899159664, |
|
"eval_loss": 0.028198465704917908, |
|
"eval_runtime": 35.9924, |
|
"eval_samples_per_second": 1.389, |
|
"eval_steps_per_second": 0.056, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"step": 115, |
|
"total_flos": 2.895570431785304e+18, |
|
"train_loss": 0.030544885701459388, |
|
"train_runtime": 6073.1055, |
|
"train_samples_per_second": 1.762, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.895570431785304e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|