|
{ |
|
"best_metric": 11.9196138381958, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 1.5444015444015444, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03088803088803089, |
|
"grad_norm": 0.06109972670674324, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03088803088803089, |
|
"eval_loss": 11.928597450256348, |
|
"eval_runtime": 0.828, |
|
"eval_samples_per_second": 264.478, |
|
"eval_steps_per_second": 33.814, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06177606177606178, |
|
"grad_norm": 0.07205282151699066, |
|
"learning_rate": 0.0001, |
|
"loss": 11.926, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.09266409266409266, |
|
"grad_norm": 0.0648089349269867, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 11.9224, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.12355212355212356, |
|
"grad_norm": 0.08991728723049164, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 11.9314, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 0.10385900735855103, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 11.9305, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.18532818532818532, |
|
"grad_norm": 0.08745633810758591, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 11.9325, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 0.09449993818998337, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 11.9301, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.2471042471042471, |
|
"grad_norm": 0.11885674297809601, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 11.9337, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.277992277992278, |
|
"grad_norm": 0.061852987855672836, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 11.9292, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 0.05906722694635391, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 11.9272, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33976833976833976, |
|
"grad_norm": 0.06637009233236313, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 11.9279, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.37065637065637064, |
|
"grad_norm": 0.08344832062721252, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 11.9264, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.4015444015444015, |
|
"grad_norm": 0.09188637137413025, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 11.9299, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 0.10101278871297836, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 11.9287, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 0.11991675198078156, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 11.9296, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4942084942084942, |
|
"grad_norm": 0.12269867956638336, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 11.9358, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.525096525096525, |
|
"grad_norm": 0.06880807876586914, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 11.9237, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.555984555984556, |
|
"grad_norm": 0.0790449008345604, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 11.9237, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5868725868725869, |
|
"grad_norm": 0.07787377387285233, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 11.927, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 0.10630682855844498, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 11.9244, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 0.1050586923956871, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 11.9249, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.6795366795366795, |
|
"grad_norm": 0.12070325016975403, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 11.9281, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.7104247104247104, |
|
"grad_norm": 0.11996880918741226, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 11.9255, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.7413127413127413, |
|
"grad_norm": 0.16234727203845978, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 11.9262, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 0.07762131094932556, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 11.9224, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"eval_loss": 11.924177169799805, |
|
"eval_runtime": 0.7805, |
|
"eval_samples_per_second": 280.597, |
|
"eval_steps_per_second": 35.875, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.803088803088803, |
|
"grad_norm": 0.07824923098087311, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9248, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.833976833976834, |
|
"grad_norm": 0.1009729653596878, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 11.9261, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.12158763408660889, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 11.9224, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.8957528957528957, |
|
"grad_norm": 0.14848434925079346, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 11.9251, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 0.15349432826042175, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 11.9235, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9575289575289575, |
|
"grad_norm": 0.1826583743095398, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 11.9222, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.9884169884169884, |
|
"grad_norm": 0.16778188943862915, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 11.9317, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.0193050193050193, |
|
"grad_norm": 0.1833105981349945, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 19.424, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.05019305019305, |
|
"grad_norm": 0.0869530737400055, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 12.095, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.11778981238603592, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 11.902, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.111969111969112, |
|
"grad_norm": 0.1222325935959816, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 12.3136, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.14096081256866455, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 11.603, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.1737451737451738, |
|
"grad_norm": 0.15266837179660797, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 11.896, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.2046332046332047, |
|
"grad_norm": 0.15551863610744476, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 11.6295, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"grad_norm": 0.19857841730117798, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 12.065, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.2664092664092665, |
|
"grad_norm": 0.13945285975933075, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 11.2033, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.2972972972972974, |
|
"grad_norm": 0.09375537186861038, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 12.0239, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.3281853281853282, |
|
"grad_norm": 0.11622730642557144, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 13.0858, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.359073359073359, |
|
"grad_norm": 0.13087835907936096, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 11.6548, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"grad_norm": 0.17029710114002228, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 11.7197, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.420849420849421, |
|
"grad_norm": 0.14172868430614471, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 11.5624, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.4517374517374517, |
|
"grad_norm": 0.16212929785251617, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 11.9965, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.4826254826254825, |
|
"grad_norm": 0.19312851130962372, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 12.248, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.5135135135135136, |
|
"grad_norm": 0.13117873668670654, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 10.6341, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 0.09648028761148453, |
|
"learning_rate": 0.0, |
|
"loss": 13.4304, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"eval_loss": 11.9196138381958, |
|
"eval_runtime": 0.7842, |
|
"eval_samples_per_second": 279.277, |
|
"eval_steps_per_second": 35.707, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1042494259200.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|