|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 10.0, |
|
"global_step": 1384, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999355950970494, |
|
"loss": 0.5182, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997424135721297, |
|
"loss": 0.2324, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994205549599399, |
|
"loss": 0.2389, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989701850946613, |
|
"loss": 0.2291, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983915360245138, |
|
"loss": 0.2182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977613181928558, |
|
"loss": 0.2245, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969398145204346, |
|
"loss": 0.2189, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004959910777697026, |
|
"loss": 0.2148, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004949155967670468, |
|
"loss": 0.2242, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004937139256424639, |
|
"loss": 0.2113, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004923866835440515, |
|
"loss": 0.2212, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004909345543189974, |
|
"loss": 0.212, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004893582861612366, |
|
"loss": 0.2013, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00048765869122595047, |
|
"loss": 0.2228, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00048583664521111415, |
|
"loss": 0.1779, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00048389308690630165, |
|
"loss": 0.2137, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00048182901770898496, |
|
"loss": 0.1894, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004796455011085747, |
|
"loss": 0.2175, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00047734366213846903, |
|
"loss": 0.1969, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00047492468679639156, |
|
"loss": 0.1976, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00047238982143331946, |
|
"loss": 0.2004, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004697403721113144, |
|
"loss": 0.1829, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000466977703930588, |
|
"loss": 0.2145, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004641032403261489, |
|
"loss": 0.2168, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00046111846233439283, |
|
"loss": 0.1924, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00045802490783001485, |
|
"loss": 0.2023, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00045482417073363604, |
|
"loss": 0.2061, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004515179001905528, |
|
"loss": 0.2002, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000448107799721033, |
|
"loss": 0.194, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00044459562634259475, |
|
"loss": 0.1921, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004409831896647228, |
|
"loss": 0.1975, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00043727235095648647, |
|
"loss": 0.1995, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004334650221875406, |
|
"loss": 0.2173, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00042956316504300416, |
|
"loss": 0.2032, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004255687899127229, |
|
"loss": 0.2031, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00042148395485543767, |
|
"loss": 0.2004, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000417310764538392, |
|
"loss": 0.1966, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00041305136915292486, |
|
"loss": 0.2355, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004087079633066076, |
|
"loss": 0.1976, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004042827848924964, |
|
"loss": 0.214, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00039977811393608143, |
|
"loss": 0.1978, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0003951962714205291, |
|
"loss": 0.1958, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0003905396180908197, |
|
"loss": 0.2032, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00038581055323739946, |
|
"loss": 0.1839, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00038101151345997175, |
|
"loss": 0.1954, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0003761449714120656, |
|
"loss": 0.1991, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0003712134345270275, |
|
"loss": 0.1984, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0003662194437260931, |
|
"loss": 0.1895, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00036116557210920554, |
|
"loss": 0.1876, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00035605442362925284, |
|
"loss": 0.1896, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00035088863175040946, |
|
"loss": 0.1932, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0003456708580912725, |
|
"loss": 0.2007, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00034040379105349086, |
|
"loss": 0.1815, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0003350901444365959, |
|
"loss": 0.1707, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0003297326560397451, |
|
"loss": 0.2185, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0003243340862511003, |
|
"loss": 0.2049, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00031889721662556813, |
|
"loss": 0.2334, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0003134248484516332, |
|
"loss": 0.2091, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00030791980130802485, |
|
"loss": 0.1785, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00030238491161095913, |
|
"loss": 0.1961, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002968230311527065, |
|
"loss": 0.185, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002912370256322358, |
|
"loss": 0.1823, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00028562977317869454, |
|
"loss": 0.1937, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00028000416286848355, |
|
"loss": 0.2217, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002743630932366912, |
|
"loss": 0.204, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002687094707836551, |
|
"loss": 0.1993, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002630462084774183, |
|
"loss": 0.2013, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00025737622425285454, |
|
"loss": 0.1956, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002517024395082337, |
|
"loss": 0.208, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0002460277776000023, |
|
"loss": 0.1845, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00024035516233655632, |
|
"loss": 0.1921, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00023468751647177984, |
|
"loss": 0.1736, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002290277601991279, |
|
"loss": 0.1844, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00022337880964702823, |
|
"loss": 0.1933, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00021774357537637746, |
|
"loss": 0.1919, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00021212496088090602, |
|
"loss": 0.2008, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00020652586109118432, |
|
"loss": 0.1894, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002009491608830409, |
|
"loss": 0.1817, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001953977335911613, |
|
"loss": 0.1812, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018987443952863336, |
|
"loss": 0.1731, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018438212451320137, |
|
"loss": 0.1848, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001789236184009898, |
|
"loss": 0.1894, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00017350173362844999, |
|
"loss": 0.1861, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00016811926376328256, |
|
"loss": 0.1775, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00016277898206508199, |
|
"loss": 0.1872, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00015748364005644422, |
|
"loss": 0.1867, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00015223596610527455, |
|
"loss": 0.1809, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00014703866401902528, |
|
"loss": 0.1743, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00014189441165158822, |
|
"loss": 0.1978, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001368058595235591, |
|
"loss": 0.1825, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00013177562945658578, |
|
"loss": 0.1857, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00012680631322250236, |
|
"loss": 0.1898, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00012190047120794725, |
|
"loss": 0.1915, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00011706063109515111, |
|
"loss": 0.1728, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00011228928655957607, |
|
"loss": 0.1759, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00010758889598507615, |
|
"loss": 0.1817, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00010296188119724162, |
|
"loss": 0.1835, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.841062621557937e-05, |
|
"loss": 0.2016, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.393747602517259e-05, |
|
"loss": 0.2042, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.954473536845239e-05, |
|
"loss": 0.1779, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.523466755770443e-05, |
|
"loss": 0.1868, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.100949330892093e-05, |
|
"loss": 0.1878, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.687138959760159e-05, |
|
"loss": 0.1826, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.282248853708981e-05, |
|
"loss": 0.1798, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.886487628002441e-05, |
|
"loss": 0.1655, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.500059194347213e-05, |
|
"loss": 0.1846, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.123162655829426e-05, |
|
"loss": 0.1731, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5.755992204328969e-05, |
|
"loss": 0.1691, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.3987370204642003e-05, |
|
"loss": 0.1933, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5.051581176118689e-05, |
|
"loss": 0.1969, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.7147035396001405e-05, |
|
"loss": 0.1949, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.388277683480446e-05, |
|
"loss": 0.1897, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.072471795164279e-05, |
|
"loss": 0.1776, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.767448590232342e-05, |
|
"loss": 0.1833, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.473365228603928e-05, |
|
"loss": 0.1782, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.190373233561955e-05, |
|
"loss": 0.1888, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.9186184136822392e-05, |
|
"loss": 0.1747, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.6582407877071836e-05, |
|
"loss": 0.2005, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.4093745124026402e-05, |
|
"loss": 0.1848, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1721478134350798e-05, |
|
"loss": 0.1769, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.946682919304693e-05, |
|
"loss": 0.186, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.7330959983684863e-05, |
|
"loss": 0.1715, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.5314970989857735e-05, |
|
"loss": 0.1694, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.3419900928169498e-05, |
|
"loss": 0.1777, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.1646726213047437e-05, |
|
"loss": 0.1734, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.996360453655068e-06, |
|
"loss": 0.1989, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.469653983164933e-06, |
|
"loss": 0.1897, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.067393420633589e-06, |
|
"loss": 0.1898, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.790301265704539e-06, |
|
"loss": 0.1856, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.639035526348145e-06, |
|
"loss": 0.1787, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.6141893798301293e-06, |
|
"loss": 0.1827, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.7162908670833596e-06, |
|
"loss": 0.1816, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.9458026206404245e-06, |
|
"loss": 0.1751, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3031216262671675e-06, |
|
"loss": 0.1862, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.885790184201935e-07, |
|
"loss": 0.1938, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.024399096332898e-07, |
|
"loss": 0.1696, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.4490325392102488e-07, |
|
"loss": 0.1761, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6101744269997332e-08, |
|
"loss": 0.1897, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1384, |
|
"total_flos": 3.2554020600775967e+18, |
|
"train_loss": 0.19606802009605948, |
|
"train_runtime": 24012.3724, |
|
"train_samples_per_second": 3.688, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1384, |
|
"num_train_epochs": 2, |
|
"save_steps": 150, |
|
"total_flos": 3.2554020600775967e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|