|
{ |
|
"best_metric": 1.0107625722885132, |
|
"best_model_checkpoint": "/data/Andre/Ref-Finder-Mistral/checkpoint-3170", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 3170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.031545741324921134, |
|
"grad_norm": 0.5029881000518799, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7154, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06309148264984227, |
|
"grad_norm": 0.3064497709274292, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5963, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0946372239747634, |
|
"grad_norm": 0.3118360638618469, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5074, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12618296529968454, |
|
"grad_norm": 0.33069083094596863, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4047, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15772870662460567, |
|
"grad_norm": 0.2797032296657562, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4167, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1892744479495268, |
|
"grad_norm": 0.3190701901912689, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3361, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22082018927444794, |
|
"grad_norm": 0.3070685863494873, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2655, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25236593059936907, |
|
"grad_norm": 0.3203960359096527, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2295, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28391167192429023, |
|
"grad_norm": 0.30132830142974854, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2277, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31545741324921134, |
|
"grad_norm": 0.3356678783893585, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1848, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3470031545741325, |
|
"grad_norm": 0.3275781273841858, |
|
"learning_rate": 5e-05, |
|
"loss": 1.183, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3785488958990536, |
|
"grad_norm": 0.30640777945518494, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1488, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41009463722397477, |
|
"grad_norm": 0.5068441033363342, |
|
"learning_rate": 5e-05, |
|
"loss": 1.147, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4416403785488959, |
|
"grad_norm": 0.310285747051239, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1908, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47318611987381703, |
|
"grad_norm": 0.38677722215652466, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1319, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5047318611987381, |
|
"grad_norm": 0.3474641740322113, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1215, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5362776025236593, |
|
"grad_norm": 0.37211593985557556, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1503, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5678233438485805, |
|
"grad_norm": 0.3207016885280609, |
|
"learning_rate": 5e-05, |
|
"loss": 1.182, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5993690851735016, |
|
"grad_norm": 0.34209126234054565, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1298, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6309148264984227, |
|
"grad_norm": 0.3956719934940338, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6624605678233438, |
|
"grad_norm": 0.34193623065948486, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1148, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.694006309148265, |
|
"grad_norm": 0.3550577759742737, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1091, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7255520504731862, |
|
"grad_norm": 0.34275463223457336, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1185, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7570977917981072, |
|
"grad_norm": 0.36972326040267944, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1388, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7886435331230284, |
|
"grad_norm": 0.36260902881622314, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0982, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8201892744479495, |
|
"grad_norm": 0.35559672117233276, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1032, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8517350157728707, |
|
"grad_norm": 0.3544253706932068, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1239, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8832807570977917, |
|
"grad_norm": 0.3803843855857849, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9148264984227129, |
|
"grad_norm": 0.3776736855506897, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1001, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9463722397476341, |
|
"grad_norm": 0.4238007068634033, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0968, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9779179810725552, |
|
"grad_norm": 0.4062643051147461, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1188, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.1006102561950684, |
|
"eval_runtime": 66.4381, |
|
"eval_samples_per_second": 4.786, |
|
"eval_steps_per_second": 0.602, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0094637223974763, |
|
"grad_norm": 0.3485482633113861, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0843, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0410094637223974, |
|
"grad_norm": 0.40558719635009766, |
|
"learning_rate": 5e-05, |
|
"loss": 1.097, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.0725552050473186, |
|
"grad_norm": 0.4074763059616089, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0697, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1041009463722398, |
|
"grad_norm": 0.40961453318595886, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0635, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.135646687697161, |
|
"grad_norm": 0.3752257227897644, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0936, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.167192429022082, |
|
"grad_norm": 0.3867760896682739, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0782, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.1987381703470033, |
|
"grad_norm": 0.4072268307209015, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0574, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.2302839116719242, |
|
"grad_norm": 0.3942580819129944, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1081, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2618296529968454, |
|
"grad_norm": 0.4262318015098572, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0821, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2933753943217665, |
|
"grad_norm": 0.39012083411216736, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0712, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.3249211356466877, |
|
"grad_norm": 0.4160712659358978, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1106, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3564668769716088, |
|
"grad_norm": 0.3966641128063202, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0411, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.38801261829653, |
|
"grad_norm": 0.3720882833003998, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0815, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4195583596214512, |
|
"grad_norm": 0.396207332611084, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0462, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4511041009463723, |
|
"grad_norm": 0.38164132833480835, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0891, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.4826498422712935, |
|
"grad_norm": 0.38896164298057556, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0905, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5141955835962144, |
|
"grad_norm": 0.4327830374240875, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1205, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.5457413249211358, |
|
"grad_norm": 0.423364520072937, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0477, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.5772870662460567, |
|
"grad_norm": 0.4212876558303833, |
|
"learning_rate": 5e-05, |
|
"loss": 1.12, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.608832807570978, |
|
"grad_norm": 0.3814271092414856, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0695, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.640378548895899, |
|
"grad_norm": 0.3973582983016968, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0832, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.6719242902208202, |
|
"grad_norm": 0.4016555845737457, |
|
"learning_rate": 5e-05, |
|
"loss": 1.077, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7034700315457414, |
|
"grad_norm": 0.4084228575229645, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0674, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.7350157728706623, |
|
"grad_norm": 0.4218040406703949, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0464, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7665615141955837, |
|
"grad_norm": 0.3857240080833435, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0656, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.7981072555205047, |
|
"grad_norm": 0.3926863968372345, |
|
"learning_rate": 5e-05, |
|
"loss": 1.056, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.8296529968454258, |
|
"grad_norm": 0.4352160096168518, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0443, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.861198738170347, |
|
"grad_norm": 0.4079754650592804, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0502, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.8927444794952681, |
|
"grad_norm": 0.40210971236228943, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0613, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9242902208201893, |
|
"grad_norm": 0.3993563950061798, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0341, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.9558359621451105, |
|
"grad_norm": 0.47853732109069824, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0071, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.9873817034700316, |
|
"grad_norm": 0.42926380038261414, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0383, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.0646495819091797, |
|
"eval_runtime": 66.4837, |
|
"eval_samples_per_second": 4.783, |
|
"eval_steps_per_second": 0.602, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0189274447949526, |
|
"grad_norm": 0.3886430561542511, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0667, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.050473186119874, |
|
"grad_norm": 0.4253116846084595, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0323, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.082018927444795, |
|
"grad_norm": 0.40994375944137573, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.1135646687697163, |
|
"grad_norm": 0.3847936689853668, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0022, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.145110410094637, |
|
"grad_norm": 0.43215593695640564, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0564, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.176656151419558, |
|
"grad_norm": 0.4463648498058319, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0277, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.2082018927444795, |
|
"grad_norm": 0.42896410822868347, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0466, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2397476340694005, |
|
"grad_norm": 0.4028797149658203, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0588, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.271293375394322, |
|
"grad_norm": 0.4177733361721039, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0519, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.302839116719243, |
|
"grad_norm": 0.42829203605651855, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0202, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.334384858044164, |
|
"grad_norm": 0.5054190158843994, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9972, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.365930599369085, |
|
"grad_norm": 0.4306070804595947, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0412, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.3974763406940065, |
|
"grad_norm": 0.443590372800827, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0424, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.4290220820189274, |
|
"grad_norm": 0.4287286400794983, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0331, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.4605678233438484, |
|
"grad_norm": 0.39775350689888, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0454, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.4921135646687698, |
|
"grad_norm": 0.4093973636627197, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0442, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.5236593059936907, |
|
"grad_norm": 0.45389777421951294, |
|
"learning_rate": 5e-05, |
|
"loss": 1.024, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.555205047318612, |
|
"grad_norm": 0.428648442029953, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0407, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.586750788643533, |
|
"grad_norm": 0.41237714886665344, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0159, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.6182965299684544, |
|
"grad_norm": 0.42067545652389526, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0347, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.6498422712933754, |
|
"grad_norm": 0.4184909462928772, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0337, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.6813880126182967, |
|
"grad_norm": 0.414995014667511, |
|
"learning_rate": 5e-05, |
|
"loss": 1.092, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.7129337539432177, |
|
"grad_norm": 0.4137355089187622, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0514, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.7444794952681386, |
|
"grad_norm": 0.45818576216697693, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0225, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.77602523659306, |
|
"grad_norm": 0.455785870552063, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0483, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.807570977917981, |
|
"grad_norm": 0.4084894061088562, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9846, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.8391167192429023, |
|
"grad_norm": 0.4103436768054962, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0217, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.8706624605678233, |
|
"grad_norm": 0.40420758724212646, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0399, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.9022082018927446, |
|
"grad_norm": 0.5487234592437744, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0394, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.9337539432176656, |
|
"grad_norm": 0.47695016860961914, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0445, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.965299684542587, |
|
"grad_norm": 0.41771531105041504, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0377, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.996845425867508, |
|
"grad_norm": 0.5724055767059326, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0193, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.0459696054458618, |
|
"eval_runtime": 66.4844, |
|
"eval_samples_per_second": 4.783, |
|
"eval_steps_per_second": 0.602, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.028391167192429, |
|
"grad_norm": 0.481629878282547, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9837, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.0599369085173502, |
|
"grad_norm": 0.42061686515808105, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0083, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.091482649842271, |
|
"grad_norm": 0.4234108030796051, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0249, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.1230283911671926, |
|
"grad_norm": 0.43123263120651245, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0319, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.1545741324921135, |
|
"grad_norm": 0.4268761873245239, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0067, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.186119873817035, |
|
"grad_norm": 0.41744470596313477, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0316, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.217665615141956, |
|
"grad_norm": 0.46088990569114685, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9993, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.249211356466877, |
|
"grad_norm": 0.43155333399772644, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0275, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.280757097791798, |
|
"grad_norm": 0.4405035972595215, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0014, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.312302839116719, |
|
"grad_norm": 0.466680645942688, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0066, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.3438485804416405, |
|
"grad_norm": 0.4462493360042572, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0081, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.3753943217665614, |
|
"grad_norm": 0.4766935706138611, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9957, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.406940063091483, |
|
"grad_norm": 0.4287005364894867, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0022, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.4384858044164037, |
|
"grad_norm": 0.43795284628868103, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0248, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.470031545741325, |
|
"grad_norm": 0.4681282937526703, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0241, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.501577287066246, |
|
"grad_norm": 0.44735008478164673, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0209, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.5331230283911674, |
|
"grad_norm": 0.4473140835762024, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9824, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.5646687697160884, |
|
"grad_norm": 0.44602036476135254, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0095, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.5962145110410093, |
|
"grad_norm": 0.455937922000885, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0045, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.6277602523659307, |
|
"grad_norm": 0.416535347700119, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0293, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.6593059936908516, |
|
"grad_norm": 0.454054057598114, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9761, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.690851735015773, |
|
"grad_norm": 0.4191015958786011, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0275, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.722397476340694, |
|
"grad_norm": 0.45472997426986694, |
|
"learning_rate": 5e-05, |
|
"loss": 0.975, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.753943217665615, |
|
"grad_norm": 0.429548442363739, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9638, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.7854889589905363, |
|
"grad_norm": 0.4479614198207855, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0034, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.8170347003154577, |
|
"grad_norm": 0.41878965497016907, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0102, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.8485804416403786, |
|
"grad_norm": 0.42527589201927185, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9746, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.8801261829652995, |
|
"grad_norm": 0.4646793007850647, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0139, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.911671924290221, |
|
"grad_norm": 0.41096052527427673, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0247, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.943217665615142, |
|
"grad_norm": 0.4595187306404114, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0149, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.9747634069400632, |
|
"grad_norm": 0.4228056073188782, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0199, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.032894492149353, |
|
"eval_runtime": 66.5042, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 0.601, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.006309148264984, |
|
"grad_norm": 0.4469398558139801, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9636, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.037854889589905, |
|
"grad_norm": 0.4484340250492096, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9827, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.069400630914826, |
|
"grad_norm": 0.4563854932785034, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9877, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.100946372239748, |
|
"grad_norm": 0.44243761897087097, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9872, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.132492113564669, |
|
"grad_norm": 0.448011189699173, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0118, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 4.16403785488959, |
|
"grad_norm": 0.4259743094444275, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0109, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 4.195583596214511, |
|
"grad_norm": 0.456064909696579, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9552, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 4.2271293375394325, |
|
"grad_norm": 0.49178850650787354, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9976, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 4.2586750788643535, |
|
"grad_norm": 0.4512215852737427, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9889, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.290220820189274, |
|
"grad_norm": 0.4504569172859192, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9675, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 4.321766561514195, |
|
"grad_norm": 0.4347565472126007, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9904, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 4.353312302839116, |
|
"grad_norm": 0.4649258852005005, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9832, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 4.384858044164038, |
|
"grad_norm": 0.4316873252391815, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9952, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 4.416403785488959, |
|
"grad_norm": 0.4411141872406006, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9743, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.44794952681388, |
|
"grad_norm": 0.46868711709976196, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9737, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 4.479495268138801, |
|
"grad_norm": 0.47713035345077515, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9646, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 4.511041009463723, |
|
"grad_norm": 0.4720157980918884, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9645, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 4.542586750788644, |
|
"grad_norm": 0.4508207440376282, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9669, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 4.574132492113565, |
|
"grad_norm": 0.4645206928253174, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9945, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.605678233438486, |
|
"grad_norm": 0.45657721161842346, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0193, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 4.6372239747634065, |
|
"grad_norm": 0.48605337738990784, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9796, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 4.668769716088328, |
|
"grad_norm": 0.4564870595932007, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0164, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.700315457413249, |
|
"grad_norm": 0.46090081334114075, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9854, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.73186119873817, |
|
"grad_norm": 0.4782868027687073, |
|
"learning_rate": 5e-05, |
|
"loss": 0.985, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.763406940063091, |
|
"grad_norm": 0.45532533526420593, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9555, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.794952681388013, |
|
"grad_norm": 0.4831511676311493, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9775, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.826498422712934, |
|
"grad_norm": 0.4660089612007141, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9805, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.858044164037855, |
|
"grad_norm": 0.47603532671928406, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0222, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.889589905362776, |
|
"grad_norm": 0.4162875711917877, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9867, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.921135646687697, |
|
"grad_norm": 0.4378200173377991, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9762, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.952681388012619, |
|
"grad_norm": 0.43556976318359375, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9557, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.9842271293375395, |
|
"grad_norm": 0.4165530204772949, |
|
"learning_rate": 5e-05, |
|
"loss": 0.998, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.02390456199646, |
|
"eval_runtime": 66.4974, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 0.602, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 5.0157728706624605, |
|
"grad_norm": 0.557310938835144, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9705, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.047318611987381, |
|
"grad_norm": 0.47156888246536255, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9629, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.078864353312303, |
|
"grad_norm": 0.51046222448349, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9429, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 5.110410094637224, |
|
"grad_norm": 0.48319852352142334, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9637, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 5.141955835962145, |
|
"grad_norm": 0.45673197507858276, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9233, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 5.173501577287066, |
|
"grad_norm": 0.5032113194465637, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9486, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 5.205047318611987, |
|
"grad_norm": 0.449439138174057, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9107, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 5.236593059936909, |
|
"grad_norm": 0.4683469831943512, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9608, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 5.26813880126183, |
|
"grad_norm": 0.48362118005752563, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9246, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 5.299684542586751, |
|
"grad_norm": 0.4709579050540924, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9958, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 5.331230283911672, |
|
"grad_norm": 0.4630713164806366, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9837, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 5.3627760252365935, |
|
"grad_norm": 0.475508451461792, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0084, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.394321766561514, |
|
"grad_norm": 0.5352875590324402, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9595, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 5.425867507886435, |
|
"grad_norm": 0.5087634325027466, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9697, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 5.457413249211356, |
|
"grad_norm": 0.4558835029602051, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9609, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 5.488958990536277, |
|
"grad_norm": 0.5090092420578003, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9732, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.520504731861199, |
|
"grad_norm": 0.48192793130874634, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9917, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 5.55205047318612, |
|
"grad_norm": 0.4428229033946991, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9607, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 5.583596214511041, |
|
"grad_norm": 0.4858005940914154, |
|
"learning_rate": 5e-05, |
|
"loss": 0.994, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 5.615141955835962, |
|
"grad_norm": 0.4797442555427551, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9554, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 5.646687697160884, |
|
"grad_norm": 0.4797378480434418, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9486, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 5.678233438485805, |
|
"grad_norm": 0.4509980082511902, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9693, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.709779179810726, |
|
"grad_norm": 0.45232152938842773, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9622, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.7413249211356465, |
|
"grad_norm": 0.49943023920059204, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0051, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 5.7728706624605675, |
|
"grad_norm": 0.4827818274497986, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9536, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 5.804416403785489, |
|
"grad_norm": 0.4689510464668274, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9706, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 5.83596214511041, |
|
"grad_norm": 0.47188493609428406, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9582, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.867507886435331, |
|
"grad_norm": 0.47195523977279663, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9688, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.899053627760252, |
|
"grad_norm": 0.4700336158275604, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9399, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.930599369085174, |
|
"grad_norm": 0.5036072731018066, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9726, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.962145110410095, |
|
"grad_norm": 0.5032414197921753, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9426, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.993690851735016, |
|
"grad_norm": 0.4505554139614105, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9911, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.0179320573806763, |
|
"eval_runtime": 66.4482, |
|
"eval_samples_per_second": 4.786, |
|
"eval_steps_per_second": 0.602, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.025236593059937, |
|
"grad_norm": 0.48737627267837524, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9504, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 6.056782334384858, |
|
"grad_norm": 0.520263135433197, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9411, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.0883280757097795, |
|
"grad_norm": 0.4799466133117676, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9448, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 6.1198738170347005, |
|
"grad_norm": 0.49849933385849, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9511, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 6.151419558359621, |
|
"grad_norm": 0.4995006322860718, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9315, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 6.182965299684542, |
|
"grad_norm": 0.5434730648994446, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9509, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 6.214511041009464, |
|
"grad_norm": 0.5055322647094727, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9449, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 6.246056782334385, |
|
"grad_norm": 0.4768029749393463, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9356, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 6.277602523659306, |
|
"grad_norm": 0.5039747357368469, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9478, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 6.309148264984227, |
|
"grad_norm": 0.5042532086372375, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8941, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.340694006309148, |
|
"grad_norm": 0.5117079615592957, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9081, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 6.37223974763407, |
|
"grad_norm": 0.5625054836273193, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9588, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 6.403785488958991, |
|
"grad_norm": 0.49397581815719604, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9405, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 6.435331230283912, |
|
"grad_norm": 0.5129591226577759, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9357, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 6.466876971608833, |
|
"grad_norm": 0.5299010276794434, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9425, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 6.498422712933754, |
|
"grad_norm": 0.512342095375061, |
|
"learning_rate": 5e-05, |
|
"loss": 0.936, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 6.529968454258675, |
|
"grad_norm": 0.5136451721191406, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9549, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 6.561514195583596, |
|
"grad_norm": 0.6025319695472717, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9705, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 6.593059936908517, |
|
"grad_norm": 0.48766204714775085, |
|
"learning_rate": 5e-05, |
|
"loss": 0.96, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 6.624605678233438, |
|
"grad_norm": 0.4721720516681671, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9457, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.65615141955836, |
|
"grad_norm": 0.48331397771835327, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9105, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 6.687697160883281, |
|
"grad_norm": 0.4890565872192383, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9859, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 6.719242902208202, |
|
"grad_norm": 0.5263992547988892, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9659, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 6.750788643533123, |
|
"grad_norm": 0.45187363028526306, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9319, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 6.782334384858045, |
|
"grad_norm": 0.4888645112514496, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9623, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 6.813880126182966, |
|
"grad_norm": 0.48433786630630493, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9601, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 6.8454258675078865, |
|
"grad_norm": 0.5414565205574036, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9381, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 6.8769716088328074, |
|
"grad_norm": 0.47471919655799866, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9443, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 6.908517350157728, |
|
"grad_norm": 0.4787106513977051, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9331, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 6.94006309148265, |
|
"grad_norm": 0.4515725076198578, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9461, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.971608832807571, |
|
"grad_norm": 0.4714019000530243, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9587, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.0130518674850464, |
|
"eval_runtime": 66.5051, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 0.601, |
|
"step": 2219 |
|
}, |
|
{ |
|
"epoch": 7.003154574132492, |
|
"grad_norm": 0.44392409920692444, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9691, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 7.034700315457413, |
|
"grad_norm": 0.538865864276886, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9095, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 7.066246056782334, |
|
"grad_norm": 0.5173049569129944, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9281, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.097791798107256, |
|
"grad_norm": 0.4751831293106079, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9094, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 7.129337539432177, |
|
"grad_norm": 0.5221697092056274, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9148, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 7.160883280757098, |
|
"grad_norm": 0.5088801383972168, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9383, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 7.192429022082019, |
|
"grad_norm": 0.5191715359687805, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9187, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 7.2239747634069404, |
|
"grad_norm": 0.5438238382339478, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9192, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 7.255520504731861, |
|
"grad_norm": 0.5197346210479736, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9226, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.287066246056782, |
|
"grad_norm": 0.5286086797714233, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9009, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 7.318611987381703, |
|
"grad_norm": 0.4977555274963379, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9524, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 7.350157728706624, |
|
"grad_norm": 0.5014932751655579, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9356, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 7.381703470031546, |
|
"grad_norm": 0.5207954049110413, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9095, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 7.413249211356467, |
|
"grad_norm": 0.512366771697998, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9357, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 7.444794952681388, |
|
"grad_norm": 0.5742561221122742, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9188, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 7.476340694006309, |
|
"grad_norm": 0.5032497644424438, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9624, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 7.50788643533123, |
|
"grad_norm": 0.6190054416656494, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8954, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 7.539432176656152, |
|
"grad_norm": 0.5226176977157593, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9212, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 7.570977917981073, |
|
"grad_norm": 0.5045409202575684, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9459, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.6025236593059935, |
|
"grad_norm": 0.48342952132225037, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9306, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 7.634069400630915, |
|
"grad_norm": 0.48288047313690186, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9217, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 7.665615141955836, |
|
"grad_norm": 0.5119076371192932, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9594, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 7.697160883280757, |
|
"grad_norm": 0.5182865262031555, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9158, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 7.728706624605678, |
|
"grad_norm": 0.5085521340370178, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9249, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 7.760252365930599, |
|
"grad_norm": 0.49291595816612244, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9128, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 7.79179810725552, |
|
"grad_norm": 0.5067439675331116, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8993, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 7.823343848580442, |
|
"grad_norm": 0.49475356936454773, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9313, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 7.854889589905363, |
|
"grad_norm": 0.5028258562088013, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9459, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 7.886435331230284, |
|
"grad_norm": 0.482112854719162, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9243, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.917981072555205, |
|
"grad_norm": 0.5285838842391968, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9847, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 7.9495268138801265, |
|
"grad_norm": 0.5217479467391968, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9172, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 7.981072555205047, |
|
"grad_norm": 0.47651416063308716, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9003, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.0111174583435059, |
|
"eval_runtime": 66.5497, |
|
"eval_samples_per_second": 4.778, |
|
"eval_steps_per_second": 0.601, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 8.012618296529968, |
|
"grad_norm": 0.5173642635345459, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9408, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 8.04416403785489, |
|
"grad_norm": 0.5017074346542358, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9028, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 8.07570977917981, |
|
"grad_norm": 0.5437857508659363, |
|
"learning_rate": 5e-05, |
|
"loss": 0.916, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 8.107255520504731, |
|
"grad_norm": 0.485762357711792, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8698, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 8.138801261829652, |
|
"grad_norm": 0.5231262445449829, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9004, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 8.170347003154575, |
|
"grad_norm": 0.49633580446243286, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9159, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 8.201892744479496, |
|
"grad_norm": 0.5477921366691589, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9015, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.233438485804417, |
|
"grad_norm": 0.5651286840438843, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8925, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 8.264984227129338, |
|
"grad_norm": 0.5210168957710266, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9172, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 8.296529968454259, |
|
"grad_norm": 0.5071650743484497, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9172, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 8.32807570977918, |
|
"grad_norm": 0.5585223436355591, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9234, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 8.3596214511041, |
|
"grad_norm": 0.5303429961204529, |
|
"learning_rate": 5e-05, |
|
"loss": 0.896, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 8.391167192429021, |
|
"grad_norm": 0.5033040046691895, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9431, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 8.422712933753942, |
|
"grad_norm": 0.4882967472076416, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9103, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 8.454258675078865, |
|
"grad_norm": 0.5938067436218262, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9067, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 8.485804416403786, |
|
"grad_norm": 0.5606987476348877, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9177, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 8.517350157728707, |
|
"grad_norm": 0.5056515336036682, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8924, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.548895899053628, |
|
"grad_norm": 0.5208995938301086, |
|
"learning_rate": 5e-05, |
|
"loss": 0.917, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 8.580441640378549, |
|
"grad_norm": 0.575134813785553, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9132, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 8.61198738170347, |
|
"grad_norm": 0.5263710021972656, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9162, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 8.64353312302839, |
|
"grad_norm": 0.5916036367416382, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9147, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 8.675078864353312, |
|
"grad_norm": 0.5610800981521606, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9022, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 8.706624605678233, |
|
"grad_norm": 0.5309184193611145, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8736, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 8.738170347003155, |
|
"grad_norm": 0.5035881996154785, |
|
"learning_rate": 5e-05, |
|
"loss": 0.898, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 8.769716088328076, |
|
"grad_norm": 0.5445141196250916, |
|
"learning_rate": 5e-05, |
|
"loss": 0.903, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 8.801261829652997, |
|
"grad_norm": 0.5459301471710205, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9124, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 8.832807570977918, |
|
"grad_norm": 0.5099250078201294, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9132, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.864353312302839, |
|
"grad_norm": 0.5143303275108337, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9085, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 8.89589905362776, |
|
"grad_norm": 0.5371480584144592, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9463, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 8.927444794952681, |
|
"grad_norm": 0.517353892326355, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8911, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 8.958990536277602, |
|
"grad_norm": 0.5601980090141296, |
|
"learning_rate": 5e-05, |
|
"loss": 0.915, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 8.990536277602523, |
|
"grad_norm": 0.5473778247833252, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9254, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.0110243558883667, |
|
"eval_runtime": 66.5208, |
|
"eval_samples_per_second": 4.78, |
|
"eval_steps_per_second": 0.601, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.022082018927446, |
|
"grad_norm": 0.5774141550064087, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8731, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 9.053627760252366, |
|
"grad_norm": 0.5381526350975037, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9125, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 9.085173501577287, |
|
"grad_norm": 0.5414624810218811, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8838, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 9.116719242902208, |
|
"grad_norm": 0.526127815246582, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8709, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 9.14826498422713, |
|
"grad_norm": 0.5719351768493652, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8976, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.17981072555205, |
|
"grad_norm": 0.6119252443313599, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9006, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 9.211356466876971, |
|
"grad_norm": 0.5286473035812378, |
|
"learning_rate": 5e-05, |
|
"loss": 0.873, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 9.242902208201892, |
|
"grad_norm": 0.5602397918701172, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9113, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 9.274447949526813, |
|
"grad_norm": 0.5757038593292236, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8967, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 9.305993690851736, |
|
"grad_norm": 0.5797662138938904, |
|
"learning_rate": 5e-05, |
|
"loss": 0.921, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 9.337539432176657, |
|
"grad_norm": 0.5598446726799011, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9121, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 9.369085173501578, |
|
"grad_norm": 0.5119657516479492, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8748, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 9.400630914826499, |
|
"grad_norm": 0.5484170317649841, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8971, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 9.43217665615142, |
|
"grad_norm": 0.5351391434669495, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8466, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 9.46372239747634, |
|
"grad_norm": 0.5337589979171753, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8986, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.495268138801261, |
|
"grad_norm": 0.5773183703422546, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9001, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 9.526813880126182, |
|
"grad_norm": 0.6163984537124634, |
|
"learning_rate": 5e-05, |
|
"loss": 0.902, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 9.558359621451103, |
|
"grad_norm": 0.5879700183868408, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8855, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 9.589905362776026, |
|
"grad_norm": 0.5596455335617065, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9052, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 9.621451104100947, |
|
"grad_norm": 0.5862036943435669, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9092, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 9.652996845425868, |
|
"grad_norm": 0.5491965413093567, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8887, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 9.684542586750789, |
|
"grad_norm": 0.5651736259460449, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8873, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 9.71608832807571, |
|
"grad_norm": 0.5439329147338867, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8871, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 9.74763406940063, |
|
"grad_norm": 0.5257729887962341, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8711, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 9.779179810725552, |
|
"grad_norm": 0.5310476422309875, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9118, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.810725552050473, |
|
"grad_norm": 0.5593333840370178, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8647, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 9.842271293375394, |
|
"grad_norm": 0.5591513514518738, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8788, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 9.873817034700316, |
|
"grad_norm": 0.5862401723861694, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9113, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 9.905362776025237, |
|
"grad_norm": 0.5847012996673584, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9228, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 9.936908517350158, |
|
"grad_norm": 0.5507489442825317, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9114, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 9.968454258675079, |
|
"grad_norm": 0.5988635420799255, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8753, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.6517265439033508, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8532, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.0107625722885132, |
|
"eval_runtime": 66.5594, |
|
"eval_samples_per_second": 4.778, |
|
"eval_steps_per_second": 0.601, |
|
"step": 3170 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1112958196396851e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|