|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.113015284854099, |
|
"eval_steps": 200, |
|
"global_step": 600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018527095877721167, |
|
"grad_norm": 5.348576545715332, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 1.7335, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.037054191755442334, |
|
"grad_norm": 5.819892883300781, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 1.7063, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0555812876331635, |
|
"grad_norm": 2.0946009159088135, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 1.5389, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07410838351088467, |
|
"grad_norm": 6.124255657196045, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 1.9017, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09263547938860583, |
|
"grad_norm": 4.986006736755371, |
|
"learning_rate": 6.17283950617284e-06, |
|
"loss": 1.4697, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.111162575266327, |
|
"grad_norm": 3.69557785987854, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 1.2454, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12968967114404817, |
|
"grad_norm": 4.338206768035889, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 1.2242, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14821676702176934, |
|
"grad_norm": 2.3303167819976807, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.7272, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1667438628994905, |
|
"grad_norm": 2.708115339279175, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.5907, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18527095877721167, |
|
"grad_norm": 1.4415699243545532, |
|
"learning_rate": 1.234567901234568e-05, |
|
"loss": 0.6154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20379805465493284, |
|
"grad_norm": 1.7580137252807617, |
|
"learning_rate": 1.3580246913580248e-05, |
|
"loss": 0.4568, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.222325150532654, |
|
"grad_norm": 2.5971596240997314, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.4862, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24085224641037517, |
|
"grad_norm": 1.3559226989746094, |
|
"learning_rate": 1.6049382716049385e-05, |
|
"loss": 0.4366, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25937934228809634, |
|
"grad_norm": 0.6591945290565491, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 0.3757, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2779064381658175, |
|
"grad_norm": 2.574704170227051, |
|
"learning_rate": 1.851851851851852e-05, |
|
"loss": 0.4693, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29643353404353867, |
|
"grad_norm": 3.002263307571411, |
|
"learning_rate": 1.9753086419753087e-05, |
|
"loss": 0.4896, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31496062992125984, |
|
"grad_norm": 3.431332588195801, |
|
"learning_rate": 1.999850819197622e-05, |
|
"loss": 0.4864, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.333487725798981, |
|
"grad_norm": 1.1350328922271729, |
|
"learning_rate": 1.99924484847108e-05, |
|
"loss": 0.3713, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.35201482167670217, |
|
"grad_norm": 1.6894770860671997, |
|
"learning_rate": 1.9981730462964303e-05, |
|
"loss": 0.4814, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37054191755442334, |
|
"grad_norm": 1.3769453763961792, |
|
"learning_rate": 1.9966359123301492e-05, |
|
"loss": 0.4288, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3890690134321445, |
|
"grad_norm": 1.1856595277786255, |
|
"learning_rate": 1.9946341631587086e-05, |
|
"loss": 0.4447, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4075961093098657, |
|
"grad_norm": 1.7599550485610962, |
|
"learning_rate": 1.9921687319645183e-05, |
|
"loss": 0.349, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.42612320518758684, |
|
"grad_norm": 1.5848398208618164, |
|
"learning_rate": 1.9892407680908904e-05, |
|
"loss": 0.396, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.444650301065308, |
|
"grad_norm": 1.9259053468704224, |
|
"learning_rate": 1.9858516365062334e-05, |
|
"loss": 0.3352, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4631773969430292, |
|
"grad_norm": 1.4261807203292847, |
|
"learning_rate": 1.9820029171677288e-05, |
|
"loss": 0.3511, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48170449282075034, |
|
"grad_norm": 1.699010968208313, |
|
"learning_rate": 1.977696404284779e-05, |
|
"loss": 0.4073, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5002315886984715, |
|
"grad_norm": 1.3403549194335938, |
|
"learning_rate": 1.9729341054825783e-05, |
|
"loss": 0.4454, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5187586845761927, |
|
"grad_norm": 1.2229658365249634, |
|
"learning_rate": 1.9677182408661894e-05, |
|
"loss": 0.4352, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5372857804539138, |
|
"grad_norm": 2.2487080097198486, |
|
"learning_rate": 1.9620512419855684e-05, |
|
"loss": 0.392, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.555812876331635, |
|
"grad_norm": 2.6429977416992188, |
|
"learning_rate": 1.9559357507020163e-05, |
|
"loss": 0.4013, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5743399722093562, |
|
"grad_norm": 2.240354061126709, |
|
"learning_rate": 1.9493746179565854e-05, |
|
"loss": 0.4111, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5928670680870773, |
|
"grad_norm": 1.2388675212860107, |
|
"learning_rate": 1.94237090244102e-05, |
|
"loss": 0.3653, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6113941639647985, |
|
"grad_norm": 2.2535054683685303, |
|
"learning_rate": 1.9349278691718426e-05, |
|
"loss": 0.3956, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6299212598425197, |
|
"grad_norm": 2.5032520294189453, |
|
"learning_rate": 1.9270489879682592e-05, |
|
"loss": 0.3697, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6484483557202408, |
|
"grad_norm": 2.4367105960845947, |
|
"learning_rate": 1.9187379318345845e-05, |
|
"loss": 0.4188, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.666975451597962, |
|
"grad_norm": 2.6159491539001465, |
|
"learning_rate": 1.9099985752479505e-05, |
|
"loss": 0.4415, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6855025474756832, |
|
"grad_norm": 1.8182092905044556, |
|
"learning_rate": 1.900834992352087e-05, |
|
"loss": 0.3273, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7040296433534043, |
|
"grad_norm": 3.406963348388672, |
|
"learning_rate": 1.8912514550580242e-05, |
|
"loss": 0.4069, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7225567392311255, |
|
"grad_norm": 1.886953353881836, |
|
"learning_rate": 1.881252431052599e-05, |
|
"loss": 0.3452, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7410838351088467, |
|
"grad_norm": 2.346081018447876, |
|
"learning_rate": 1.870842581715691e-05, |
|
"loss": 0.2954, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7596109309865678, |
|
"grad_norm": 1.7905707359313965, |
|
"learning_rate": 1.8600267599471663e-05, |
|
"loss": 0.344, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.778138026864289, |
|
"grad_norm": 1.5074595212936401, |
|
"learning_rate": 1.8488100079045345e-05, |
|
"loss": 0.4834, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7966651227420102, |
|
"grad_norm": 2.0368354320526123, |
|
"learning_rate": 1.8371975546523795e-05, |
|
"loss": 0.4263, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8151922186197313, |
|
"grad_norm": 2.4880967140197754, |
|
"learning_rate": 1.825194813724654e-05, |
|
"loss": 0.2868, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8337193144974525, |
|
"grad_norm": 1.4323982000350952, |
|
"learning_rate": 1.81280738060098e-05, |
|
"loss": 0.3404, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8522464103751737, |
|
"grad_norm": 1.955913782119751, |
|
"learning_rate": 1.8000410300981305e-05, |
|
"loss": 0.329, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8707735062528948, |
|
"grad_norm": 2.0698235034942627, |
|
"learning_rate": 1.786901713677902e-05, |
|
"loss": 0.3959, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.889300602130616, |
|
"grad_norm": 2.9616572856903076, |
|
"learning_rate": 1.7733955566726438e-05, |
|
"loss": 0.3973, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9078276980083372, |
|
"grad_norm": 3.0657591819763184, |
|
"learning_rate": 1.7595288554297295e-05, |
|
"loss": 0.4035, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9263547938860583, |
|
"grad_norm": 1.5825896263122559, |
|
"learning_rate": 1.7453080743763e-05, |
|
"loss": 0.3797, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9448818897637795, |
|
"grad_norm": 1.8893063068389893, |
|
"learning_rate": 1.7307398430056595e-05, |
|
"loss": 0.2627, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9634089856415007, |
|
"grad_norm": 1.5115277767181396, |
|
"learning_rate": 1.7158309527867117e-05, |
|
"loss": 0.281, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.9819360815192218, |
|
"grad_norm": 4.204952239990234, |
|
"learning_rate": 1.700588353997891e-05, |
|
"loss": 0.4839, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.001852709587772, |
|
"grad_norm": 3.4891834259033203, |
|
"learning_rate": 1.6850191524870548e-05, |
|
"loss": 0.4232, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0203798054654933, |
|
"grad_norm": 2.1796462535858154, |
|
"learning_rate": 1.6691306063588583e-05, |
|
"loss": 0.3583, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0389069013432144, |
|
"grad_norm": 1.7773243188858032, |
|
"learning_rate": 1.6529301225911433e-05, |
|
"loss": 0.3135, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0574339972209357, |
|
"grad_norm": 1.6911367177963257, |
|
"learning_rate": 1.6364252535819284e-05, |
|
"loss": 0.2577, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0759610930986567, |
|
"grad_norm": 1.934979796409607, |
|
"learning_rate": 1.619623693628605e-05, |
|
"loss": 0.2957, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.094488188976378, |
|
"grad_norm": 2.352208137512207, |
|
"learning_rate": 1.602533275340984e-05, |
|
"loss": 0.3576, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.113015284854099, |
|
"grad_norm": 1.4006640911102295, |
|
"learning_rate": 1.5851619659898623e-05, |
|
"loss": 0.3574, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1617, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3080296690801705e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|