RepairLLaMA / trainer_state.json
andre15silva's picture
up
f46a625 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 10.0,
"global_step": 1384,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 0.0004999355950970494,
"loss": 0.5182,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 0.0004997424135721297,
"loss": 0.2324,
"step": 20
},
{
"epoch": 0.04,
"learning_rate": 0.0004994205549599399,
"loss": 0.2389,
"step": 30
},
{
"epoch": 0.06,
"learning_rate": 0.0004989701850946613,
"loss": 0.2291,
"step": 40
},
{
"epoch": 0.07,
"learning_rate": 0.0004983915360245138,
"loss": 0.2182,
"step": 50
},
{
"epoch": 0.09,
"learning_rate": 0.0004977613181928558,
"loss": 0.2245,
"step": 60
},
{
"epoch": 0.1,
"learning_rate": 0.0004969398145204346,
"loss": 0.2189,
"step": 70
},
{
"epoch": 0.12,
"learning_rate": 0.0004959910777697026,
"loss": 0.2148,
"step": 80
},
{
"epoch": 0.13,
"learning_rate": 0.0004949155967670468,
"loss": 0.2242,
"step": 90
},
{
"epoch": 0.14,
"learning_rate": 0.0004937139256424639,
"loss": 0.2113,
"step": 100
},
{
"epoch": 0.16,
"learning_rate": 0.0004923866835440515,
"loss": 0.2212,
"step": 110
},
{
"epoch": 0.17,
"learning_rate": 0.0004909345543189974,
"loss": 0.212,
"step": 120
},
{
"epoch": 0.19,
"learning_rate": 0.0004893582861612366,
"loss": 0.2013,
"step": 130
},
{
"epoch": 0.2,
"learning_rate": 0.00048765869122595047,
"loss": 0.2228,
"step": 140
},
{
"epoch": 0.22,
"learning_rate": 0.00048583664521111415,
"loss": 0.1779,
"step": 150
},
{
"epoch": 0.23,
"learning_rate": 0.00048389308690630165,
"loss": 0.2137,
"step": 160
},
{
"epoch": 0.25,
"learning_rate": 0.00048182901770898496,
"loss": 0.1894,
"step": 170
},
{
"epoch": 0.26,
"learning_rate": 0.0004796455011085747,
"loss": 0.2175,
"step": 180
},
{
"epoch": 0.27,
"learning_rate": 0.00047734366213846903,
"loss": 0.1969,
"step": 190
},
{
"epoch": 0.29,
"learning_rate": 0.00047492468679639156,
"loss": 0.1976,
"step": 200
},
{
"epoch": 0.3,
"learning_rate": 0.00047238982143331946,
"loss": 0.2004,
"step": 210
},
{
"epoch": 0.32,
"learning_rate": 0.0004697403721113144,
"loss": 0.1829,
"step": 220
},
{
"epoch": 0.33,
"learning_rate": 0.000466977703930588,
"loss": 0.2145,
"step": 230
},
{
"epoch": 0.35,
"learning_rate": 0.0004641032403261489,
"loss": 0.2168,
"step": 240
},
{
"epoch": 0.36,
"learning_rate": 0.00046111846233439283,
"loss": 0.1924,
"step": 250
},
{
"epoch": 0.38,
"learning_rate": 0.00045802490783001485,
"loss": 0.2023,
"step": 260
},
{
"epoch": 0.39,
"learning_rate": 0.00045482417073363604,
"loss": 0.2061,
"step": 270
},
{
"epoch": 0.4,
"learning_rate": 0.0004515179001905528,
"loss": 0.2002,
"step": 280
},
{
"epoch": 0.42,
"learning_rate": 0.000448107799721033,
"loss": 0.194,
"step": 290
},
{
"epoch": 0.43,
"learning_rate": 0.00044459562634259475,
"loss": 0.1921,
"step": 300
},
{
"epoch": 0.45,
"learning_rate": 0.0004409831896647228,
"loss": 0.1975,
"step": 310
},
{
"epoch": 0.46,
"learning_rate": 0.00043727235095648647,
"loss": 0.1995,
"step": 320
},
{
"epoch": 0.48,
"learning_rate": 0.0004334650221875406,
"loss": 0.2173,
"step": 330
},
{
"epoch": 0.49,
"learning_rate": 0.00042956316504300416,
"loss": 0.2032,
"step": 340
},
{
"epoch": 0.51,
"learning_rate": 0.0004255687899127229,
"loss": 0.2031,
"step": 350
},
{
"epoch": 0.52,
"learning_rate": 0.00042148395485543767,
"loss": 0.2004,
"step": 360
},
{
"epoch": 0.53,
"learning_rate": 0.000417310764538392,
"loss": 0.1966,
"step": 370
},
{
"epoch": 0.55,
"learning_rate": 0.00041305136915292486,
"loss": 0.2355,
"step": 380
},
{
"epoch": 0.56,
"learning_rate": 0.0004087079633066076,
"loss": 0.1976,
"step": 390
},
{
"epoch": 0.58,
"learning_rate": 0.0004042827848924964,
"loss": 0.214,
"step": 400
},
{
"epoch": 0.59,
"learning_rate": 0.00039977811393608143,
"loss": 0.1978,
"step": 410
},
{
"epoch": 0.61,
"learning_rate": 0.0003951962714205291,
"loss": 0.1958,
"step": 420
},
{
"epoch": 0.62,
"learning_rate": 0.0003905396180908197,
"loss": 0.2032,
"step": 430
},
{
"epoch": 0.64,
"learning_rate": 0.00038581055323739946,
"loss": 0.1839,
"step": 440
},
{
"epoch": 0.65,
"learning_rate": 0.00038101151345997175,
"loss": 0.1954,
"step": 450
},
{
"epoch": 0.66,
"learning_rate": 0.0003761449714120656,
"loss": 0.1991,
"step": 460
},
{
"epoch": 0.68,
"learning_rate": 0.0003712134345270275,
"loss": 0.1984,
"step": 470
},
{
"epoch": 0.69,
"learning_rate": 0.0003662194437260931,
"loss": 0.1895,
"step": 480
},
{
"epoch": 0.71,
"learning_rate": 0.00036116557210920554,
"loss": 0.1876,
"step": 490
},
{
"epoch": 0.72,
"learning_rate": 0.00035605442362925284,
"loss": 0.1896,
"step": 500
},
{
"epoch": 0.74,
"learning_rate": 0.00035088863175040946,
"loss": 0.1932,
"step": 510
},
{
"epoch": 0.75,
"learning_rate": 0.0003456708580912725,
"loss": 0.2007,
"step": 520
},
{
"epoch": 0.77,
"learning_rate": 0.00034040379105349086,
"loss": 0.1815,
"step": 530
},
{
"epoch": 0.78,
"learning_rate": 0.0003350901444365959,
"loss": 0.1707,
"step": 540
},
{
"epoch": 0.79,
"learning_rate": 0.0003297326560397451,
"loss": 0.2185,
"step": 550
},
{
"epoch": 0.81,
"learning_rate": 0.0003243340862511003,
"loss": 0.2049,
"step": 560
},
{
"epoch": 0.82,
"learning_rate": 0.00031889721662556813,
"loss": 0.2334,
"step": 570
},
{
"epoch": 0.84,
"learning_rate": 0.0003134248484516332,
"loss": 0.2091,
"step": 580
},
{
"epoch": 0.85,
"learning_rate": 0.00030791980130802485,
"loss": 0.1785,
"step": 590
},
{
"epoch": 0.87,
"learning_rate": 0.00030238491161095913,
"loss": 0.1961,
"step": 600
},
{
"epoch": 0.88,
"learning_rate": 0.0002968230311527065,
"loss": 0.185,
"step": 610
},
{
"epoch": 0.9,
"learning_rate": 0.0002912370256322358,
"loss": 0.1823,
"step": 620
},
{
"epoch": 0.91,
"learning_rate": 0.00028562977317869454,
"loss": 0.1937,
"step": 630
},
{
"epoch": 0.92,
"learning_rate": 0.00028000416286848355,
"loss": 0.2217,
"step": 640
},
{
"epoch": 0.94,
"learning_rate": 0.0002743630932366912,
"loss": 0.204,
"step": 650
},
{
"epoch": 0.95,
"learning_rate": 0.0002687094707836551,
"loss": 0.1993,
"step": 660
},
{
"epoch": 0.97,
"learning_rate": 0.0002630462084774183,
"loss": 0.2013,
"step": 670
},
{
"epoch": 0.98,
"learning_rate": 0.00025737622425285454,
"loss": 0.1956,
"step": 680
},
{
"epoch": 1.0,
"learning_rate": 0.0002517024395082337,
"loss": 0.208,
"step": 690
},
{
"epoch": 1.01,
"learning_rate": 0.0002460277776000023,
"loss": 0.1845,
"step": 700
},
{
"epoch": 1.03,
"learning_rate": 0.00024035516233655632,
"loss": 0.1921,
"step": 710
},
{
"epoch": 1.04,
"learning_rate": 0.00023468751647177984,
"loss": 0.1736,
"step": 720
},
{
"epoch": 1.05,
"learning_rate": 0.0002290277601991279,
"loss": 0.1844,
"step": 730
},
{
"epoch": 1.07,
"learning_rate": 0.00022337880964702823,
"loss": 0.1933,
"step": 740
},
{
"epoch": 1.08,
"learning_rate": 0.00021774357537637746,
"loss": 0.1919,
"step": 750
},
{
"epoch": 1.1,
"learning_rate": 0.00021212496088090602,
"loss": 0.2008,
"step": 760
},
{
"epoch": 1.11,
"learning_rate": 0.00020652586109118432,
"loss": 0.1894,
"step": 770
},
{
"epoch": 1.13,
"learning_rate": 0.0002009491608830409,
"loss": 0.1817,
"step": 780
},
{
"epoch": 1.14,
"learning_rate": 0.0001953977335911613,
"loss": 0.1812,
"step": 790
},
{
"epoch": 1.16,
"learning_rate": 0.00018987443952863336,
"loss": 0.1731,
"step": 800
},
{
"epoch": 1.17,
"learning_rate": 0.00018438212451320137,
"loss": 0.1848,
"step": 810
},
{
"epoch": 1.18,
"learning_rate": 0.0001789236184009898,
"loss": 0.1894,
"step": 820
},
{
"epoch": 1.2,
"learning_rate": 0.00017350173362844999,
"loss": 0.1861,
"step": 830
},
{
"epoch": 1.21,
"learning_rate": 0.00016811926376328256,
"loss": 0.1775,
"step": 840
},
{
"epoch": 1.23,
"learning_rate": 0.00016277898206508199,
"loss": 0.1872,
"step": 850
},
{
"epoch": 1.24,
"learning_rate": 0.00015748364005644422,
"loss": 0.1867,
"step": 860
},
{
"epoch": 1.26,
"learning_rate": 0.00015223596610527455,
"loss": 0.1809,
"step": 870
},
{
"epoch": 1.27,
"learning_rate": 0.00014703866401902528,
"loss": 0.1743,
"step": 880
},
{
"epoch": 1.29,
"learning_rate": 0.00014189441165158822,
"loss": 0.1978,
"step": 890
},
{
"epoch": 1.3,
"learning_rate": 0.0001368058595235591,
"loss": 0.1825,
"step": 900
},
{
"epoch": 1.32,
"learning_rate": 0.00013177562945658578,
"loss": 0.1857,
"step": 910
},
{
"epoch": 1.33,
"learning_rate": 0.00012680631322250236,
"loss": 0.1898,
"step": 920
},
{
"epoch": 1.34,
"learning_rate": 0.00012190047120794725,
"loss": 0.1915,
"step": 930
},
{
"epoch": 1.36,
"learning_rate": 0.00011706063109515111,
"loss": 0.1728,
"step": 940
},
{
"epoch": 1.37,
"learning_rate": 0.00011228928655957607,
"loss": 0.1759,
"step": 950
},
{
"epoch": 1.39,
"learning_rate": 0.00010758889598507615,
"loss": 0.1817,
"step": 960
},
{
"epoch": 1.4,
"learning_rate": 0.00010296188119724162,
"loss": 0.1835,
"step": 970
},
{
"epoch": 1.42,
"learning_rate": 9.841062621557937e-05,
"loss": 0.2016,
"step": 980
},
{
"epoch": 1.43,
"learning_rate": 9.393747602517259e-05,
"loss": 0.2042,
"step": 990
},
{
"epoch": 1.45,
"learning_rate": 8.954473536845239e-05,
"loss": 0.1779,
"step": 1000
},
{
"epoch": 1.46,
"learning_rate": 8.523466755770443e-05,
"loss": 0.1868,
"step": 1010
},
{
"epoch": 1.47,
"learning_rate": 8.100949330892093e-05,
"loss": 0.1878,
"step": 1020
},
{
"epoch": 1.49,
"learning_rate": 7.687138959760159e-05,
"loss": 0.1826,
"step": 1030
},
{
"epoch": 1.5,
"learning_rate": 7.282248853708981e-05,
"loss": 0.1798,
"step": 1040
},
{
"epoch": 1.52,
"learning_rate": 6.886487628002441e-05,
"loss": 0.1655,
"step": 1050
},
{
"epoch": 1.53,
"learning_rate": 6.500059194347213e-05,
"loss": 0.1846,
"step": 1060
},
{
"epoch": 1.55,
"learning_rate": 6.123162655829426e-05,
"loss": 0.1731,
"step": 1070
},
{
"epoch": 1.56,
"learning_rate": 5.755992204328969e-05,
"loss": 0.1691,
"step": 1080
},
{
"epoch": 1.58,
"learning_rate": 5.3987370204642003e-05,
"loss": 0.1933,
"step": 1090
},
{
"epoch": 1.59,
"learning_rate": 5.051581176118689e-05,
"loss": 0.1969,
"step": 1100
},
{
"epoch": 1.6,
"learning_rate": 4.7147035396001405e-05,
"loss": 0.1949,
"step": 1110
},
{
"epoch": 1.62,
"learning_rate": 4.388277683480446e-05,
"loss": 0.1897,
"step": 1120
},
{
"epoch": 1.63,
"learning_rate": 4.072471795164279e-05,
"loss": 0.1776,
"step": 1130
},
{
"epoch": 1.65,
"learning_rate": 3.767448590232342e-05,
"loss": 0.1833,
"step": 1140
},
{
"epoch": 1.66,
"learning_rate": 3.473365228603928e-05,
"loss": 0.1782,
"step": 1150
},
{
"epoch": 1.68,
"learning_rate": 3.190373233561955e-05,
"loss": 0.1888,
"step": 1160
},
{
"epoch": 1.69,
"learning_rate": 2.9186184136822392e-05,
"loss": 0.1747,
"step": 1170
},
{
"epoch": 1.71,
"learning_rate": 2.6582407877071836e-05,
"loss": 0.2005,
"step": 1180
},
{
"epoch": 1.72,
"learning_rate": 2.4093745124026402e-05,
"loss": 0.1848,
"step": 1190
},
{
"epoch": 1.73,
"learning_rate": 2.1721478134350798e-05,
"loss": 0.1769,
"step": 1200
},
{
"epoch": 1.75,
"learning_rate": 1.946682919304693e-05,
"loss": 0.186,
"step": 1210
},
{
"epoch": 1.76,
"learning_rate": 1.7330959983684863e-05,
"loss": 0.1715,
"step": 1220
},
{
"epoch": 1.78,
"learning_rate": 1.5314970989857735e-05,
"loss": 0.1694,
"step": 1230
},
{
"epoch": 1.79,
"learning_rate": 1.3419900928169498e-05,
"loss": 0.1777,
"step": 1240
},
{
"epoch": 1.81,
"learning_rate": 1.1646726213047437e-05,
"loss": 0.1734,
"step": 1250
},
{
"epoch": 1.82,
"learning_rate": 9.996360453655068e-06,
"loss": 0.1989,
"step": 1260
},
{
"epoch": 1.84,
"learning_rate": 8.469653983164933e-06,
"loss": 0.1897,
"step": 1270
},
{
"epoch": 1.85,
"learning_rate": 7.067393420633589e-06,
"loss": 0.1898,
"step": 1280
},
{
"epoch": 1.86,
"learning_rate": 5.790301265704539e-06,
"loss": 0.1856,
"step": 1290
},
{
"epoch": 1.88,
"learning_rate": 4.639035526348145e-06,
"loss": 0.1787,
"step": 1300
},
{
"epoch": 1.89,
"learning_rate": 3.6141893798301293e-06,
"loss": 0.1827,
"step": 1310
},
{
"epoch": 1.91,
"learning_rate": 2.7162908670833596e-06,
"loss": 0.1816,
"step": 1320
},
{
"epoch": 1.92,
"learning_rate": 1.9458026206404245e-06,
"loss": 0.1751,
"step": 1330
},
{
"epoch": 1.94,
"learning_rate": 1.3031216262671675e-06,
"loss": 0.1862,
"step": 1340
},
{
"epoch": 1.95,
"learning_rate": 7.885790184201935e-07,
"loss": 0.1938,
"step": 1350
},
{
"epoch": 1.97,
"learning_rate": 4.024399096332898e-07,
"loss": 0.1696,
"step": 1360
},
{
"epoch": 1.98,
"learning_rate": 1.4490325392102488e-07,
"loss": 0.1761,
"step": 1370
},
{
"epoch": 1.99,
"learning_rate": 1.6101744269997332e-08,
"loss": 0.1897,
"step": 1380
},
{
"epoch": 2.0,
"step": 1384,
"total_flos": 3.2554020600775967e+18,
"train_loss": 0.19606802009605948,
"train_runtime": 24012.3724,
"train_samples_per_second": 3.688,
"train_steps_per_second": 0.058
}
],
"logging_steps": 10,
"max_steps": 1384,
"num_train_epochs": 2,
"save_steps": 150,
"total_flos": 3.2554020600775967e+18,
"trial_name": null,
"trial_params": null
}