llm_test_bpe / trainer_state.json
RefalMachine's picture
load model
8b5e245
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 119547,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00024811801548585953,
"loss": 8.8316,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 0.00029403430324938403,
"loss": 5.464,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.0002998014193905167,
"loss": 4.4569,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.0002995500515304113,
"loss": 4.0926,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.0002992986836703059,
"loss": 3.9091,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 0.00029904731581020046,
"loss": 3.7998,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 0.0002987959479500951,
"loss": 3.7103,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 0.0002985445800899897,
"loss": 3.663,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.00029829321222988426,
"loss": 3.6162,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.0002980418443697789,
"loss": 3.5725,
"step": 1000
},
{
"epoch": 0.01,
"eval_accuracy": 0.37403400092106887,
"eval_loss": 3.5909957885742188,
"eval_runtime": 37.0746,
"eval_samples_per_second": 302.526,
"eval_steps_per_second": 2.535,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.00029779047650967344,
"loss": 3.5506,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.00029753910864956806,
"loss": 3.5285,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.0002972877407894626,
"loss": 3.5064,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.00029703637292935724,
"loss": 3.4907,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0002967875187478529,
"loss": 3.4708,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.00029653615088774747,
"loss": 3.456,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.0002962847830276421,
"loss": 3.4413,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 0.00029603341516753665,
"loss": 3.4224,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 0.00029578204730743127,
"loss": 3.4184,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 0.00029553067944732583,
"loss": 3.4011,
"step": 2000
},
{
"epoch": 0.02,
"eval_accuracy": 0.39113344827973534,
"eval_loss": 3.4203457832336426,
"eval_runtime": 36.9935,
"eval_samples_per_second": 303.189,
"eval_steps_per_second": 2.541,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 0.00029527931158722045,
"loss": 3.3915,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 0.000295027943727115,
"loss": 3.3831,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 0.00029477657586700963,
"loss": 3.3814,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 0.0002945252080069042,
"loss": 3.3734,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 0.00029427384014679876,
"loss": 3.3624,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 0.0002940224722866934,
"loss": 3.3559,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.000293771104426588,
"loss": 3.3459,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 0.00029351973656648256,
"loss": 3.3462,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 0.0002932683687063772,
"loss": 3.3306,
"step": 2900
},
{
"epoch": 0.03,
"learning_rate": 0.0002930170008462718,
"loss": 3.335,
"step": 3000
},
{
"epoch": 0.03,
"eval_accuracy": 0.39839248205600547,
"eval_loss": 3.3489201068878174,
"eval_runtime": 36.3401,
"eval_samples_per_second": 308.64,
"eval_steps_per_second": 2.587,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 0.00029276563298616636,
"loss": 3.3239,
"step": 3100
},
{
"epoch": 0.03,
"learning_rate": 0.000292514265126061,
"loss": 3.3132,
"step": 3200
},
{
"epoch": 0.03,
"learning_rate": 0.00029226289726595555,
"loss": 3.3157,
"step": 3300
},
{
"epoch": 0.03,
"learning_rate": 0.00029201152940585017,
"loss": 3.3077,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 0.00029176016154574473,
"loss": 3.308,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 0.00029150879368563935,
"loss": 3.2971,
"step": 3600
},
{
"epoch": 0.03,
"learning_rate": 0.0002912574258255339,
"loss": 3.2953,
"step": 3700
},
{
"epoch": 0.03,
"learning_rate": 0.00029100605796542853,
"loss": 3.2915,
"step": 3800
},
{
"epoch": 0.03,
"learning_rate": 0.0002907572037839242,
"loss": 3.289,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 0.00029050583592381876,
"loss": 3.2835,
"step": 4000
},
{
"epoch": 0.03,
"eval_accuracy": 0.4028412728722747,
"eval_loss": 3.306710958480835,
"eval_runtime": 37.7903,
"eval_samples_per_second": 296.796,
"eval_steps_per_second": 2.487,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 0.0002902544680637134,
"loss": 3.2759,
"step": 4100
},
{
"epoch": 0.04,
"learning_rate": 0.00029000310020360794,
"loss": 3.2803,
"step": 4200
},
{
"epoch": 0.04,
"learning_rate": 0.0002897517323435025,
"loss": 3.2752,
"step": 4300
},
{
"epoch": 0.04,
"learning_rate": 0.0002895003644833971,
"loss": 3.28,
"step": 4400
},
{
"epoch": 0.04,
"learning_rate": 0.0002892489966232917,
"loss": 3.2788,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 0.0002889976287631863,
"loss": 3.2663,
"step": 4600
},
{
"epoch": 0.04,
"learning_rate": 0.0002887462609030809,
"loss": 3.2647,
"step": 4700
},
{
"epoch": 0.04,
"learning_rate": 0.0002884948930429755,
"loss": 3.2643,
"step": 4800
},
{
"epoch": 0.04,
"learning_rate": 0.0002882435251828701,
"loss": 3.2686,
"step": 4900
},
{
"epoch": 0.04,
"learning_rate": 0.00028799215732276467,
"loss": 3.2477,
"step": 5000
},
{
"epoch": 0.04,
"eval_accuracy": 0.4059681332629427,
"eval_loss": 3.2766220569610596,
"eval_runtime": 36.7229,
"eval_samples_per_second": 305.423,
"eval_steps_per_second": 2.56,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 0.0002877407894626593,
"loss": 3.247,
"step": 5100
},
{
"epoch": 0.04,
"learning_rate": 0.0002874894216025539,
"loss": 3.2555,
"step": 5200
},
{
"epoch": 0.04,
"learning_rate": 0.00028723805374244847,
"loss": 3.2528,
"step": 5300
},
{
"epoch": 0.05,
"learning_rate": 0.0002869866858823431,
"loss": 3.2441,
"step": 5400
},
{
"epoch": 0.05,
"learning_rate": 0.00028673531802223765,
"loss": 3.2458,
"step": 5500
},
{
"epoch": 0.05,
"learning_rate": 0.00028648395016213227,
"loss": 3.2394,
"step": 5600
},
{
"epoch": 0.05,
"learning_rate": 0.00028623258230202683,
"loss": 3.2464,
"step": 5700
},
{
"epoch": 0.05,
"learning_rate": 0.00028598121444192145,
"loss": 3.2484,
"step": 5800
},
{
"epoch": 0.05,
"learning_rate": 0.000285729846581816,
"loss": 3.2372,
"step": 5900
},
{
"epoch": 0.05,
"learning_rate": 0.0002854784787217106,
"loss": 3.2373,
"step": 6000
},
{
"epoch": 0.05,
"eval_accuracy": 0.40810372134296335,
"eval_loss": 3.256094455718994,
"eval_runtime": 36.3187,
"eval_samples_per_second": 308.822,
"eval_steps_per_second": 2.588,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 0.0002852271108616052,
"loss": 3.2314,
"step": 6100
},
{
"epoch": 0.05,
"learning_rate": 0.00028497825668010086,
"loss": 3.2393,
"step": 6200
},
{
"epoch": 0.05,
"learning_rate": 0.0002847268888199954,
"loss": 3.23,
"step": 6300
},
{
"epoch": 0.05,
"learning_rate": 0.0002844780346384911,
"loss": 3.2317,
"step": 6400
},
{
"epoch": 0.05,
"learning_rate": 0.0002842266667783857,
"loss": 3.2188,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 0.00028397529891828027,
"loss": 3.2251,
"step": 6600
},
{
"epoch": 0.06,
"learning_rate": 0.0002837239310581749,
"loss": 3.2235,
"step": 6700
},
{
"epoch": 0.06,
"learning_rate": 0.00028347256319806945,
"loss": 3.2174,
"step": 6800
},
{
"epoch": 0.06,
"learning_rate": 0.00028322119533796407,
"loss": 3.2212,
"step": 6900
},
{
"epoch": 0.06,
"learning_rate": 0.00028296982747785863,
"loss": 3.2208,
"step": 7000
},
{
"epoch": 0.06,
"eval_accuracy": 0.4099135655475305,
"eval_loss": 3.2382774353027344,
"eval_runtime": 36.4193,
"eval_samples_per_second": 307.969,
"eval_steps_per_second": 2.581,
"step": 7000
},
{
"epoch": 0.06,
"learning_rate": 0.00028271845961775325,
"loss": 3.215,
"step": 7100
},
{
"epoch": 0.06,
"learning_rate": 0.0002824670917576478,
"loss": 3.2124,
"step": 7200
},
{
"epoch": 0.06,
"learning_rate": 0.00028221572389754244,
"loss": 3.2214,
"step": 7300
},
{
"epoch": 0.06,
"learning_rate": 0.000281964356037437,
"loss": 3.2157,
"step": 7400
},
{
"epoch": 0.06,
"learning_rate": 0.0002817129881773316,
"loss": 3.212,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 0.00028146162031722624,
"loss": 3.2063,
"step": 7600
},
{
"epoch": 0.06,
"learning_rate": 0.0002812102524571208,
"loss": 3.2089,
"step": 7700
},
{
"epoch": 0.07,
"learning_rate": 0.0002809588845970154,
"loss": 3.2056,
"step": 7800
},
{
"epoch": 0.07,
"learning_rate": 0.00028070751673691,
"loss": 3.206,
"step": 7900
},
{
"epoch": 0.07,
"learning_rate": 0.0002804561488768046,
"loss": 3.2021,
"step": 8000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4112453244521325,
"eval_loss": 3.2249624729156494,
"eval_runtime": 37.3966,
"eval_samples_per_second": 299.92,
"eval_steps_per_second": 2.514,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 0.00028020478101669917,
"loss": 3.2098,
"step": 8100
},
{
"epoch": 0.07,
"learning_rate": 0.0002799534131565938,
"loss": 3.2099,
"step": 8200
},
{
"epoch": 0.07,
"learning_rate": 0.00027970204529648835,
"loss": 3.2075,
"step": 8300
},
{
"epoch": 0.07,
"learning_rate": 0.00027945067743638297,
"loss": 3.205,
"step": 8400
},
{
"epoch": 0.07,
"learning_rate": 0.00027920182325487863,
"loss": 3.1931,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 0.0002789504553947732,
"loss": 3.1969,
"step": 8600
},
{
"epoch": 0.07,
"learning_rate": 0.0002786990875346678,
"loss": 3.1974,
"step": 8700
},
{
"epoch": 0.07,
"learning_rate": 0.0002784502333531634,
"loss": 3.1958,
"step": 8800
},
{
"epoch": 0.07,
"learning_rate": 0.00027819886549305804,
"loss": 3.1925,
"step": 8900
},
{
"epoch": 0.08,
"learning_rate": 0.00027794749763295265,
"loss": 3.194,
"step": 9000
},
{
"epoch": 0.08,
"eval_accuracy": 0.41224642524178057,
"eval_loss": 3.2142982482910156,
"eval_runtime": 37.0575,
"eval_samples_per_second": 302.665,
"eval_steps_per_second": 2.537,
"step": 9000
},
{
"epoch": 0.08,
"learning_rate": 0.0002776961297728472,
"loss": 3.1941,
"step": 9100
},
{
"epoch": 0.08,
"learning_rate": 0.00027744476191274184,
"loss": 3.1943,
"step": 9200
},
{
"epoch": 0.08,
"learning_rate": 0.0002771933940526364,
"loss": 3.197,
"step": 9300
},
{
"epoch": 0.08,
"learning_rate": 0.000276942026192531,
"loss": 3.1912,
"step": 9400
},
{
"epoch": 0.08,
"learning_rate": 0.0002766906583324256,
"loss": 3.1941,
"step": 9500
},
{
"epoch": 0.08,
"learning_rate": 0.0002764392904723202,
"loss": 3.1904,
"step": 9600
},
{
"epoch": 0.08,
"learning_rate": 0.00027618792261221477,
"loss": 3.1807,
"step": 9700
},
{
"epoch": 0.08,
"learning_rate": 0.00027593906843071043,
"loss": 3.1854,
"step": 9800
},
{
"epoch": 0.08,
"learning_rate": 0.00027568770057060505,
"loss": 3.1859,
"step": 9900
},
{
"epoch": 0.08,
"learning_rate": 0.0002754363327104996,
"loss": 3.1971,
"step": 10000
},
{
"epoch": 0.08,
"eval_accuracy": 0.413248228065643,
"eval_loss": 3.2038817405700684,
"eval_runtime": 36.9865,
"eval_samples_per_second": 303.246,
"eval_steps_per_second": 2.541,
"step": 10000
},
{
"epoch": 0.08,
"learning_rate": 0.00027518496485039423,
"loss": 3.1776,
"step": 10100
},
{
"epoch": 0.09,
"learning_rate": 0.0002749335969902888,
"loss": 3.1872,
"step": 10200
},
{
"epoch": 0.09,
"learning_rate": 0.0002746822291301834,
"loss": 3.1792,
"step": 10300
},
{
"epoch": 0.09,
"learning_rate": 0.000274430861270078,
"loss": 3.1858,
"step": 10400
},
{
"epoch": 0.09,
"learning_rate": 0.00027417949340997254,
"loss": 3.1825,
"step": 10500
},
{
"epoch": 0.09,
"learning_rate": 0.00027392812554986716,
"loss": 3.1798,
"step": 10600
},
{
"epoch": 0.09,
"learning_rate": 0.0002736767576897618,
"loss": 3.1819,
"step": 10700
},
{
"epoch": 0.09,
"learning_rate": 0.00027342538982965634,
"loss": 3.1778,
"step": 10800
},
{
"epoch": 0.09,
"learning_rate": 0.00027317402196955096,
"loss": 3.185,
"step": 10900
},
{
"epoch": 0.09,
"learning_rate": 0.0002729226541094455,
"loss": 3.1794,
"step": 11000
},
{
"epoch": 0.09,
"eval_accuracy": 0.41429074887393713,
"eval_loss": 3.1947903633117676,
"eval_runtime": 37.2827,
"eval_samples_per_second": 300.837,
"eval_steps_per_second": 2.521,
"step": 11000
},
{
"epoch": 0.09,
"learning_rate": 0.00027267128624934014,
"loss": 3.1782,
"step": 11100
},
{
"epoch": 0.09,
"learning_rate": 0.00027242243206783575,
"loss": 3.1752,
"step": 11200
},
{
"epoch": 0.09,
"learning_rate": 0.00027217106420773037,
"loss": 3.172,
"step": 11300
},
{
"epoch": 0.1,
"learning_rate": 0.000271919696347625,
"loss": 3.1794,
"step": 11400
},
{
"epoch": 0.1,
"learning_rate": 0.00027166832848751955,
"loss": 3.1773,
"step": 11500
},
{
"epoch": 0.1,
"learning_rate": 0.00027141696062741417,
"loss": 3.1776,
"step": 11600
},
{
"epoch": 0.1,
"learning_rate": 0.0002711681064459098,
"loss": 3.1866,
"step": 11700
},
{
"epoch": 0.1,
"learning_rate": 0.0002709167385858044,
"loss": 3.1707,
"step": 11800
},
{
"epoch": 0.1,
"learning_rate": 0.00027066537072569896,
"loss": 3.1705,
"step": 11900
},
{
"epoch": 0.1,
"learning_rate": 0.0002704140028655936,
"loss": 3.1731,
"step": 12000
},
{
"epoch": 0.1,
"eval_accuracy": 0.41493381221427206,
"eval_loss": 3.1884472370147705,
"eval_runtime": 36.6321,
"eval_samples_per_second": 306.18,
"eval_steps_per_second": 2.566,
"step": 12000
},
{
"epoch": 0.1,
"learning_rate": 0.00027016514868408924,
"loss": 3.1688,
"step": 12100
},
{
"epoch": 0.1,
"learning_rate": 0.0002699137808239838,
"loss": 3.1698,
"step": 12200
},
{
"epoch": 0.1,
"learning_rate": 0.0002696624129638784,
"loss": 3.1661,
"step": 12300
},
{
"epoch": 0.1,
"learning_rate": 0.000269411045103773,
"loss": 3.163,
"step": 12400
},
{
"epoch": 0.1,
"learning_rate": 0.0002691596772436676,
"loss": 3.166,
"step": 12500
},
{
"epoch": 0.11,
"learning_rate": 0.00026890830938356217,
"loss": 3.1684,
"step": 12600
},
{
"epoch": 0.11,
"learning_rate": 0.0002686569415234568,
"loss": 3.1665,
"step": 12700
},
{
"epoch": 0.11,
"learning_rate": 0.0002684055736633514,
"loss": 3.1623,
"step": 12800
},
{
"epoch": 0.11,
"learning_rate": 0.00026815420580324597,
"loss": 3.1674,
"step": 12900
},
{
"epoch": 0.11,
"learning_rate": 0.0002679028379431406,
"loss": 3.1596,
"step": 13000
},
{
"epoch": 0.11,
"eval_accuracy": 0.41567656441304324,
"eval_loss": 3.181196928024292,
"eval_runtime": 38.8685,
"eval_samples_per_second": 288.563,
"eval_steps_per_second": 2.418,
"step": 13000
},
{
"epoch": 0.11,
"learning_rate": 0.00026765147008303515,
"loss": 3.1659,
"step": 13100
},
{
"epoch": 0.11,
"learning_rate": 0.00026740010222292977,
"loss": 3.1528,
"step": 13200
},
{
"epoch": 0.11,
"learning_rate": 0.00026714873436282433,
"loss": 3.1656,
"step": 13300
},
{
"epoch": 0.11,
"learning_rate": 0.00026689736650271895,
"loss": 3.1594,
"step": 13400
},
{
"epoch": 0.11,
"learning_rate": 0.0002666459986426135,
"loss": 3.1593,
"step": 13500
},
{
"epoch": 0.11,
"learning_rate": 0.00026639463078250813,
"loss": 3.1579,
"step": 13600
},
{
"epoch": 0.11,
"learning_rate": 0.0002661432629224027,
"loss": 3.1599,
"step": 13700
},
{
"epoch": 0.12,
"learning_rate": 0.0002658918950622973,
"loss": 3.1529,
"step": 13800
},
{
"epoch": 0.12,
"learning_rate": 0.0002656405272021919,
"loss": 3.1615,
"step": 13900
},
{
"epoch": 0.12,
"learning_rate": 0.0002653891593420865,
"loss": 3.1628,
"step": 14000
},
{
"epoch": 0.12,
"eval_accuracy": 0.41594754961977826,
"eval_loss": 3.1771674156188965,
"eval_runtime": 37.2285,
"eval_samples_per_second": 301.275,
"eval_steps_per_second": 2.525,
"step": 14000
},
{
"epoch": 0.12,
"learning_rate": 0.00026513779148198106,
"loss": 3.1594,
"step": 14100
},
{
"epoch": 0.12,
"learning_rate": 0.0002648864236218757,
"loss": 3.158,
"step": 14200
},
{
"epoch": 0.12,
"learning_rate": 0.0002646400831189724,
"loss": 3.1588,
"step": 14300
},
{
"epoch": 0.12,
"learning_rate": 0.000264388715258867,
"loss": 3.1606,
"step": 14400
},
{
"epoch": 0.12,
"learning_rate": 0.00026413734739876157,
"loss": 3.1555,
"step": 14500
},
{
"epoch": 0.12,
"learning_rate": 0.0002638859795386562,
"loss": 3.1574,
"step": 14600
},
{
"epoch": 0.12,
"learning_rate": 0.00026363461167855075,
"loss": 3.1526,
"step": 14700
},
{
"epoch": 0.12,
"learning_rate": 0.00026338324381844537,
"loss": 3.1457,
"step": 14800
},
{
"epoch": 0.12,
"learning_rate": 0.00026313187595833993,
"loss": 3.1655,
"step": 14900
},
{
"epoch": 0.13,
"learning_rate": 0.0002628805080982345,
"loss": 3.1658,
"step": 15000
},
{
"epoch": 0.13,
"eval_accuracy": 0.416945842272569,
"eval_loss": 3.170196294784546,
"eval_runtime": 38.2091,
"eval_samples_per_second": 293.542,
"eval_steps_per_second": 2.46,
"step": 15000
},
{
"epoch": 0.13,
"learning_rate": 0.0002626291402381291,
"loss": 3.1537,
"step": 15100
},
{
"epoch": 0.13,
"learning_rate": 0.00026237777237802373,
"loss": 3.1596,
"step": 15200
},
{
"epoch": 0.13,
"learning_rate": 0.00026212891819651934,
"loss": 3.1558,
"step": 15300
},
{
"epoch": 0.13,
"learning_rate": 0.00026187755033641396,
"loss": 3.1568,
"step": 15400
},
{
"epoch": 0.13,
"learning_rate": 0.0002616261824763085,
"loss": 3.1488,
"step": 15500
},
{
"epoch": 0.13,
"learning_rate": 0.00026137481461620314,
"loss": 3.1452,
"step": 15600
},
{
"epoch": 0.13,
"learning_rate": 0.0002611234467560977,
"loss": 3.1503,
"step": 15700
},
{
"epoch": 0.13,
"learning_rate": 0.0002608720788959923,
"loss": 3.1456,
"step": 15800
},
{
"epoch": 0.13,
"learning_rate": 0.00026062071103588694,
"loss": 3.1469,
"step": 15900
},
{
"epoch": 0.13,
"learning_rate": 0.0002603693431757815,
"loss": 3.1479,
"step": 16000
},
{
"epoch": 0.13,
"eval_accuracy": 0.41732213261145495,
"eval_loss": 3.1664865016937256,
"eval_runtime": 36.9736,
"eval_samples_per_second": 303.351,
"eval_steps_per_second": 2.542,
"step": 16000
},
{
"epoch": 0.13,
"learning_rate": 0.0002601179753156761,
"loss": 3.152,
"step": 16100
},
{
"epoch": 0.14,
"learning_rate": 0.00025986660745557074,
"loss": 3.1515,
"step": 16200
},
{
"epoch": 0.14,
"learning_rate": 0.0002596152395954653,
"loss": 3.1403,
"step": 16300
},
{
"epoch": 0.14,
"learning_rate": 0.0002593638717353599,
"loss": 3.1482,
"step": 16400
},
{
"epoch": 0.14,
"learning_rate": 0.0002591125038752545,
"loss": 3.1384,
"step": 16500
},
{
"epoch": 0.14,
"learning_rate": 0.0002588611360151491,
"loss": 3.1423,
"step": 16600
},
{
"epoch": 0.14,
"learning_rate": 0.0002586097681550437,
"loss": 3.1388,
"step": 16700
},
{
"epoch": 0.14,
"learning_rate": 0.00025835840029493824,
"loss": 3.1502,
"step": 16800
},
{
"epoch": 0.14,
"learning_rate": 0.00025810703243483286,
"loss": 3.1423,
"step": 16900
},
{
"epoch": 0.14,
"learning_rate": 0.0002578556645747274,
"loss": 3.1401,
"step": 17000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4181765082503061,
"eval_loss": 3.161729097366333,
"eval_runtime": 36.3895,
"eval_samples_per_second": 308.221,
"eval_steps_per_second": 2.583,
"step": 17000
},
{
"epoch": 0.14,
"learning_rate": 0.00025760429671462204,
"loss": 3.1444,
"step": 17100
},
{
"epoch": 0.14,
"learning_rate": 0.0002573529288545166,
"loss": 3.1344,
"step": 17200
},
{
"epoch": 0.14,
"learning_rate": 0.0002571015609944112,
"loss": 3.1362,
"step": 17300
},
{
"epoch": 0.15,
"learning_rate": 0.00025685019313430584,
"loss": 3.1449,
"step": 17400
},
{
"epoch": 0.15,
"learning_rate": 0.0002565988252742004,
"loss": 3.1403,
"step": 17500
},
{
"epoch": 0.15,
"learning_rate": 0.000256347457414095,
"loss": 3.1485,
"step": 17600
},
{
"epoch": 0.15,
"learning_rate": 0.00025609608955398964,
"loss": 3.1465,
"step": 17700
},
{
"epoch": 0.15,
"learning_rate": 0.0002558447216938842,
"loss": 3.1388,
"step": 17800
},
{
"epoch": 0.15,
"learning_rate": 0.0002555933538337788,
"loss": 3.1412,
"step": 17900
},
{
"epoch": 0.15,
"learning_rate": 0.0002553419859736734,
"loss": 3.1386,
"step": 18000
},
{
"epoch": 0.15,
"eval_accuracy": 0.4183225313668887,
"eval_loss": 3.1586148738861084,
"eval_runtime": 36.9298,
"eval_samples_per_second": 303.711,
"eval_steps_per_second": 2.545,
"step": 18000
},
{
"epoch": 0.15,
"learning_rate": 0.000255090618113568,
"loss": 3.1421,
"step": 18100
},
{
"epoch": 0.15,
"learning_rate": 0.00025483925025346257,
"loss": 3.1355,
"step": 18200
},
{
"epoch": 0.15,
"learning_rate": 0.0002545878823933572,
"loss": 3.1399,
"step": 18300
},
{
"epoch": 0.15,
"learning_rate": 0.00025433651453325175,
"loss": 3.1349,
"step": 18400
},
{
"epoch": 0.15,
"learning_rate": 0.0002540851466731463,
"loss": 3.1413,
"step": 18500
},
{
"epoch": 0.16,
"learning_rate": 0.00025383377881304093,
"loss": 3.1278,
"step": 18600
},
{
"epoch": 0.16,
"learning_rate": 0.0002535824109529355,
"loss": 3.1405,
"step": 18700
},
{
"epoch": 0.16,
"learning_rate": 0.0002533310430928301,
"loss": 3.1277,
"step": 18800
},
{
"epoch": 0.16,
"learning_rate": 0.00025307967523272474,
"loss": 3.1341,
"step": 18900
},
{
"epoch": 0.16,
"learning_rate": 0.0002528283073726193,
"loss": 3.1396,
"step": 19000
},
{
"epoch": 0.16,
"eval_accuracy": 0.41871075628741844,
"eval_loss": 3.1532347202301025,
"eval_runtime": 36.5045,
"eval_samples_per_second": 307.25,
"eval_steps_per_second": 2.575,
"step": 19000
},
{
"epoch": 0.16,
"learning_rate": 0.00025257945319111496,
"loss": 3.1329,
"step": 19100
},
{
"epoch": 0.16,
"learning_rate": 0.0002523280853310095,
"loss": 3.1358,
"step": 19200
},
{
"epoch": 0.16,
"learning_rate": 0.00025207671747090414,
"loss": 3.1377,
"step": 19300
},
{
"epoch": 0.16,
"learning_rate": 0.0002518253496107987,
"loss": 3.1289,
"step": 19400
},
{
"epoch": 0.16,
"learning_rate": 0.0002515739817506933,
"loss": 3.1348,
"step": 19500
},
{
"epoch": 0.16,
"learning_rate": 0.00025132261389058794,
"loss": 3.1324,
"step": 19600
},
{
"epoch": 0.16,
"learning_rate": 0.0002510712460304825,
"loss": 3.136,
"step": 19700
},
{
"epoch": 0.17,
"learning_rate": 0.0002508198781703771,
"loss": 3.1337,
"step": 19800
},
{
"epoch": 0.17,
"learning_rate": 0.00025056851031027175,
"loss": 3.132,
"step": 19900
},
{
"epoch": 0.17,
"learning_rate": 0.0002503171424501663,
"loss": 3.1345,
"step": 20000
},
{
"epoch": 0.17,
"eval_accuracy": 0.41896770080986667,
"eval_loss": 3.150233268737793,
"eval_runtime": 36.0939,
"eval_samples_per_second": 310.745,
"eval_steps_per_second": 2.604,
"step": 20000
},
{
"epoch": 0.17,
"learning_rate": 0.0002500682882686619,
"loss": 3.1301,
"step": 20100
},
{
"epoch": 0.17,
"learning_rate": 0.00024981692040855653,
"loss": 3.1261,
"step": 20200
},
{
"epoch": 0.17,
"learning_rate": 0.00024956555254845115,
"loss": 3.1279,
"step": 20300
},
{
"epoch": 0.17,
"learning_rate": 0.0002493141846883457,
"loss": 3.1235,
"step": 20400
},
{
"epoch": 0.17,
"learning_rate": 0.00024906281682824034,
"loss": 3.1302,
"step": 20500
},
{
"epoch": 0.17,
"learning_rate": 0.0002488114489681349,
"loss": 3.1287,
"step": 20600
},
{
"epoch": 0.17,
"learning_rate": 0.0002485600811080295,
"loss": 3.1314,
"step": 20700
},
{
"epoch": 0.17,
"learning_rate": 0.0002483087132479241,
"loss": 3.1226,
"step": 20800
},
{
"epoch": 0.17,
"learning_rate": 0.0002480573453878187,
"loss": 3.1289,
"step": 20900
},
{
"epoch": 0.18,
"learning_rate": 0.00024780597752771327,
"loss": 3.1319,
"step": 21000
},
{
"epoch": 0.18,
"eval_accuracy": 0.41907721814730364,
"eval_loss": 3.1475839614868164,
"eval_runtime": 36.3645,
"eval_samples_per_second": 308.432,
"eval_steps_per_second": 2.585,
"step": 21000
},
{
"epoch": 0.18,
"learning_rate": 0.0002475546096676079,
"loss": 3.1304,
"step": 21100
},
{
"epoch": 0.18,
"learning_rate": 0.00024730324180750245,
"loss": 3.1309,
"step": 21200
},
{
"epoch": 0.18,
"learning_rate": 0.00024705187394739707,
"loss": 3.1254,
"step": 21300
},
{
"epoch": 0.18,
"learning_rate": 0.00024680050608729163,
"loss": 3.1293,
"step": 21400
},
{
"epoch": 0.18,
"learning_rate": 0.00024654913822718625,
"loss": 3.1278,
"step": 21500
},
{
"epoch": 0.18,
"learning_rate": 0.0002462977703670808,
"loss": 3.1216,
"step": 21600
},
{
"epoch": 0.18,
"learning_rate": 0.00024604640250697543,
"loss": 3.1281,
"step": 21700
},
{
"epoch": 0.18,
"learning_rate": 0.0002457975483254711,
"loss": 3.1182,
"step": 21800
},
{
"epoch": 0.18,
"learning_rate": 0.00024554618046536566,
"loss": 3.1231,
"step": 21900
},
{
"epoch": 0.18,
"learning_rate": 0.0002452948126052603,
"loss": 3.1238,
"step": 22000
},
{
"epoch": 0.18,
"eval_accuracy": 0.42022504408774863,
"eval_loss": 3.1434154510498047,
"eval_runtime": 36.9095,
"eval_samples_per_second": 303.878,
"eval_steps_per_second": 2.547,
"step": 22000
},
{
"epoch": 0.18,
"learning_rate": 0.00024504344474515484,
"loss": 3.1249,
"step": 22100
},
{
"epoch": 0.19,
"learning_rate": 0.00024479207688504946,
"loss": 3.1316,
"step": 22200
},
{
"epoch": 0.19,
"learning_rate": 0.000244540709024944,
"loss": 3.1152,
"step": 22300
},
{
"epoch": 0.19,
"learning_rate": 0.00024428934116483864,
"loss": 3.1204,
"step": 22400
},
{
"epoch": 0.19,
"learning_rate": 0.00024403797330473323,
"loss": 3.1237,
"step": 22500
},
{
"epoch": 0.19,
"learning_rate": 0.00024378660544462782,
"loss": 3.1256,
"step": 22600
},
{
"epoch": 0.19,
"learning_rate": 0.00024353523758452241,
"loss": 3.1272,
"step": 22700
},
{
"epoch": 0.19,
"learning_rate": 0.000243283869724417,
"loss": 3.12,
"step": 22800
},
{
"epoch": 0.19,
"learning_rate": 0.00024303250186431162,
"loss": 3.1182,
"step": 22900
},
{
"epoch": 0.19,
"learning_rate": 0.0002427811340042062,
"loss": 3.1224,
"step": 23000
},
{
"epoch": 0.19,
"eval_accuracy": 0.42017309355588756,
"eval_loss": 3.1407454013824463,
"eval_runtime": 36.6142,
"eval_samples_per_second": 306.329,
"eval_steps_per_second": 2.567,
"step": 23000
},
{
"epoch": 0.19,
"learning_rate": 0.0002425297661441008,
"loss": 3.1174,
"step": 23100
},
{
"epoch": 0.19,
"learning_rate": 0.00024227839828399537,
"loss": 3.1199,
"step": 23200
},
{
"epoch": 0.19,
"learning_rate": 0.00024202703042389,
"loss": 3.1147,
"step": 23300
},
{
"epoch": 0.2,
"learning_rate": 0.00024177566256378458,
"loss": 3.1201,
"step": 23400
},
{
"epoch": 0.2,
"learning_rate": 0.00024152429470367917,
"loss": 3.1231,
"step": 23500
},
{
"epoch": 0.2,
"learning_rate": 0.00024127544052217483,
"loss": 3.1172,
"step": 23600
},
{
"epoch": 0.2,
"learning_rate": 0.0002410240726620694,
"loss": 3.1176,
"step": 23700
},
{
"epoch": 0.2,
"learning_rate": 0.00024077521848056506,
"loss": 3.1119,
"step": 23800
},
{
"epoch": 0.2,
"learning_rate": 0.00024052385062045965,
"loss": 3.1212,
"step": 23900
},
{
"epoch": 0.2,
"learning_rate": 0.00024027248276035424,
"loss": 3.1183,
"step": 24000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4208589809832972,
"eval_loss": 3.137460947036743,
"eval_runtime": 37.115,
"eval_samples_per_second": 302.196,
"eval_steps_per_second": 2.533,
"step": 24000
},
{
"epoch": 0.2,
"learning_rate": 0.00024002111490024883,
"loss": 3.1287,
"step": 24100
},
{
"epoch": 0.2,
"learning_rate": 0.00023976974704014342,
"loss": 3.1157,
"step": 24200
},
{
"epoch": 0.2,
"learning_rate": 0.00023951837918003804,
"loss": 3.1199,
"step": 24300
},
{
"epoch": 0.2,
"learning_rate": 0.0002392670113199326,
"loss": 3.1162,
"step": 24400
},
{
"epoch": 0.2,
"learning_rate": 0.00023901564345982722,
"loss": 3.1179,
"step": 24500
},
{
"epoch": 0.21,
"learning_rate": 0.0002387642755997218,
"loss": 3.1214,
"step": 24600
},
{
"epoch": 0.21,
"learning_rate": 0.00023851290773961638,
"loss": 3.1138,
"step": 24700
},
{
"epoch": 0.21,
"learning_rate": 0.000238261539879511,
"loss": 3.1117,
"step": 24800
},
{
"epoch": 0.21,
"learning_rate": 0.00023801017201940556,
"loss": 3.1117,
"step": 24900
},
{
"epoch": 0.21,
"learning_rate": 0.00023775880415930018,
"loss": 3.1131,
"step": 25000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4210050040998798,
"eval_loss": 3.1347129344940186,
"eval_runtime": 36.8178,
"eval_samples_per_second": 304.635,
"eval_steps_per_second": 2.553,
"step": 25000
},
{
"epoch": 0.21,
"learning_rate": 0.00023750743629919474,
"loss": 3.118,
"step": 25100
},
{
"epoch": 0.21,
"learning_rate": 0.00023725606843908936,
"loss": 3.1158,
"step": 25200
},
{
"epoch": 0.21,
"learning_rate": 0.00023700470057898395,
"loss": 3.1178,
"step": 25300
},
{
"epoch": 0.21,
"learning_rate": 0.00023675333271887855,
"loss": 3.1019,
"step": 25400
},
{
"epoch": 0.21,
"learning_rate": 0.00023650196485877314,
"loss": 3.1105,
"step": 25500
},
{
"epoch": 0.21,
"learning_rate": 0.00023625059699866773,
"loss": 3.1158,
"step": 25600
},
{
"epoch": 0.21,
"learning_rate": 0.00023599922913856232,
"loss": 3.1166,
"step": 25700
},
{
"epoch": 0.22,
"learning_rate": 0.00023574786127845694,
"loss": 3.1172,
"step": 25800
},
{
"epoch": 0.22,
"learning_rate": 0.0002354964934183515,
"loss": 3.1233,
"step": 25900
},
{
"epoch": 0.22,
"learning_rate": 0.00023524763923684716,
"loss": 3.1106,
"step": 26000
},
{
"epoch": 0.22,
"eval_accuracy": 0.42156382333449405,
"eval_loss": 3.131035566329956,
"eval_runtime": 36.1307,
"eval_samples_per_second": 310.428,
"eval_steps_per_second": 2.602,
"step": 26000
},
{
"epoch": 0.22,
"learning_rate": 0.00023499627137674175,
"loss": 3.1186,
"step": 26100
},
{
"epoch": 0.22,
"learning_rate": 0.00023474490351663635,
"loss": 3.1069,
"step": 26200
},
{
"epoch": 0.22,
"learning_rate": 0.00023449353565653094,
"loss": 3.114,
"step": 26300
},
{
"epoch": 0.22,
"learning_rate": 0.00023424216779642553,
"loss": 3.114,
"step": 26400
},
{
"epoch": 0.22,
"learning_rate": 0.0002339907999363201,
"loss": 3.1072,
"step": 26500
},
{
"epoch": 0.22,
"learning_rate": 0.0002337394320762147,
"loss": 3.1141,
"step": 26600
},
{
"epoch": 0.22,
"learning_rate": 0.0002334880642161093,
"loss": 3.1125,
"step": 26700
},
{
"epoch": 0.22,
"learning_rate": 0.0002332366963560039,
"loss": 3.1202,
"step": 26800
},
{
"epoch": 0.23,
"learning_rate": 0.00023298532849589849,
"loss": 3.1177,
"step": 26900
},
{
"epoch": 0.23,
"learning_rate": 0.0002327339606357931,
"loss": 3.114,
"step": 27000
},
{
"epoch": 0.23,
"eval_accuracy": 0.42156241926606536,
"eval_loss": 3.129709482192993,
"eval_runtime": 36.5135,
"eval_samples_per_second": 307.174,
"eval_steps_per_second": 2.574,
"step": 27000
},
{
"epoch": 0.23,
"learning_rate": 0.00023248259277568767,
"loss": 3.1107,
"step": 27100
},
{
"epoch": 0.23,
"learning_rate": 0.00023223122491558229,
"loss": 3.1111,
"step": 27200
},
{
"epoch": 0.23,
"learning_rate": 0.00023197985705547685,
"loss": 3.106,
"step": 27300
},
{
"epoch": 0.23,
"learning_rate": 0.00023172848919537147,
"loss": 3.1081,
"step": 27400
},
{
"epoch": 0.23,
"learning_rate": 0.00023147712133526606,
"loss": 3.1077,
"step": 27500
},
{
"epoch": 0.23,
"learning_rate": 0.00023122575347516065,
"loss": 3.116,
"step": 27600
},
{
"epoch": 0.23,
"learning_rate": 0.00023097438561505524,
"loss": 3.1168,
"step": 27700
},
{
"epoch": 0.23,
"learning_rate": 0.00023072301775494983,
"loss": 3.1137,
"step": 27800
},
{
"epoch": 0.23,
"learning_rate": 0.00023047164989484442,
"loss": 3.1065,
"step": 27900
},
{
"epoch": 0.23,
"learning_rate": 0.00023022028203473904,
"loss": 3.1083,
"step": 28000
},
{
"epoch": 0.23,
"eval_accuracy": 0.42211211205589316,
"eval_loss": 3.1262805461883545,
"eval_runtime": 36.106,
"eval_samples_per_second": 310.641,
"eval_steps_per_second": 2.603,
"step": 28000
},
{
"epoch": 0.24,
"learning_rate": 0.0002299689141746336,
"loss": 3.1193,
"step": 28100
},
{
"epoch": 0.24,
"learning_rate": 0.00022972257367173034,
"loss": 3.0997,
"step": 28200
},
{
"epoch": 0.24,
"learning_rate": 0.0002294712058116249,
"loss": 3.1013,
"step": 28300
},
{
"epoch": 0.24,
"learning_rate": 0.00022921983795151952,
"loss": 3.1049,
"step": 28400
},
{
"epoch": 0.24,
"learning_rate": 0.00022896847009141409,
"loss": 3.1152,
"step": 28500
},
{
"epoch": 0.24,
"learning_rate": 0.00022871710223130868,
"loss": 3.1077,
"step": 28600
},
{
"epoch": 0.24,
"learning_rate": 0.00022846573437120327,
"loss": 3.1146,
"step": 28700
},
{
"epoch": 0.24,
"learning_rate": 0.00022821436651109786,
"loss": 3.1054,
"step": 28800
},
{
"epoch": 0.24,
"learning_rate": 0.00022796299865099248,
"loss": 3.1087,
"step": 28900
},
{
"epoch": 0.24,
"learning_rate": 0.00022771163079088704,
"loss": 3.1045,
"step": 29000
},
{
"epoch": 0.24,
"eval_accuracy": 0.4221415974928954,
"eval_loss": 3.124873161315918,
"eval_runtime": 37.1734,
"eval_samples_per_second": 301.721,
"eval_steps_per_second": 2.529,
"step": 29000
},
{
"epoch": 0.24,
"learning_rate": 0.0002274627766093827,
"loss": 3.1024,
"step": 29100
},
{
"epoch": 0.24,
"learning_rate": 0.0002272114087492773,
"loss": 3.0938,
"step": 29200
},
{
"epoch": 0.25,
"learning_rate": 0.00022696004088917189,
"loss": 3.1049,
"step": 29300
},
{
"epoch": 0.25,
"learning_rate": 0.00022670867302906648,
"loss": 3.109,
"step": 29400
},
{
"epoch": 0.25,
"learning_rate": 0.00022645730516896107,
"loss": 3.1033,
"step": 29500
},
{
"epoch": 0.25,
"learning_rate": 0.0002262059373088557,
"loss": 3.1066,
"step": 29600
},
{
"epoch": 0.25,
"learning_rate": 0.00022595456944875025,
"loss": 3.1087,
"step": 29700
},
{
"epoch": 0.25,
"learning_rate": 0.00022570320158864487,
"loss": 3.101,
"step": 29800
},
{
"epoch": 0.25,
"learning_rate": 0.00022545183372853943,
"loss": 3.1137,
"step": 29900
},
{
"epoch": 0.25,
"learning_rate": 0.00022520046586843405,
"loss": 3.1084,
"step": 30000
},
{
"epoch": 0.25,
"eval_accuracy": 0.4223374650386961,
"eval_loss": 3.1216838359832764,
"eval_runtime": 39.4599,
"eval_samples_per_second": 284.238,
"eval_steps_per_second": 2.382,
"step": 30000
},
{
"epoch": 0.25,
"learning_rate": 0.00022494909800832864,
"loss": 3.1006,
"step": 30100
},
{
"epoch": 0.25,
"learning_rate": 0.00022469773014822323,
"loss": 3.1045,
"step": 30200
},
{
"epoch": 0.25,
"learning_rate": 0.00022444636228811783,
"loss": 3.1001,
"step": 30300
},
{
"epoch": 0.25,
"learning_rate": 0.00022419499442801244,
"loss": 3.0988,
"step": 30400
},
{
"epoch": 0.26,
"learning_rate": 0.000223943626567907,
"loss": 3.0981,
"step": 30500
},
{
"epoch": 0.26,
"learning_rate": 0.00022369477238640264,
"loss": 3.1027,
"step": 30600
},
{
"epoch": 0.26,
"learning_rate": 0.00022344340452629723,
"loss": 3.1046,
"step": 30700
},
{
"epoch": 0.26,
"learning_rate": 0.00022319203666619185,
"loss": 3.1025,
"step": 30800
},
{
"epoch": 0.26,
"learning_rate": 0.00022294066880608642,
"loss": 3.1025,
"step": 30900
},
{
"epoch": 0.26,
"learning_rate": 0.00022268930094598103,
"loss": 3.097,
"step": 31000
},
{
"epoch": 0.26,
"eval_accuracy": 0.42269550248800924,
"eval_loss": 3.1202731132507324,
"eval_runtime": 36.6594,
"eval_samples_per_second": 305.952,
"eval_steps_per_second": 2.564,
"step": 31000
},
{
"epoch": 0.26,
"learning_rate": 0.0002224379330858756,
"loss": 3.104,
"step": 31100
},
{
"epoch": 0.26,
"learning_rate": 0.00022218656522577022,
"loss": 3.0977,
"step": 31200
},
{
"epoch": 0.26,
"learning_rate": 0.0002219351973656648,
"loss": 3.1121,
"step": 31300
},
{
"epoch": 0.26,
"learning_rate": 0.0002216838295055594,
"loss": 3.1011,
"step": 31400
},
{
"epoch": 0.26,
"learning_rate": 0.000221432461645454,
"loss": 3.0963,
"step": 31500
},
{
"epoch": 0.26,
"learning_rate": 0.00022118109378534858,
"loss": 3.1082,
"step": 31600
},
{
"epoch": 0.27,
"learning_rate": 0.00022092972592524317,
"loss": 3.0994,
"step": 31700
},
{
"epoch": 0.27,
"learning_rate": 0.0002206783580651378,
"loss": 3.0957,
"step": 31800
},
{
"epoch": 0.27,
"learning_rate": 0.00022042699020503236,
"loss": 3.0947,
"step": 31900
},
{
"epoch": 0.27,
"learning_rate": 0.00022017562234492697,
"loss": 3.0926,
"step": 32000
},
{
"epoch": 0.27,
"eval_accuracy": 0.42268707807743716,
"eval_loss": 3.119593381881714,
"eval_runtime": 37.8215,
"eval_samples_per_second": 296.551,
"eval_steps_per_second": 2.485,
"step": 32000
},
{
"epoch": 0.27,
"learning_rate": 0.00021992425448482154,
"loss": 3.0955,
"step": 32100
},
{
"epoch": 0.27,
"learning_rate": 0.00021967288662471616,
"loss": 3.0973,
"step": 32200
},
{
"epoch": 0.27,
"learning_rate": 0.00021942151876461075,
"loss": 3.1098,
"step": 32300
},
{
"epoch": 0.27,
"learning_rate": 0.0002191701509045053,
"loss": 3.1007,
"step": 32400
},
{
"epoch": 0.27,
"learning_rate": 0.00021891878304439993,
"loss": 3.0992,
"step": 32500
},
{
"epoch": 0.27,
"learning_rate": 0.0002186674151842945,
"loss": 3.1029,
"step": 32600
},
{
"epoch": 0.27,
"learning_rate": 0.00021841604732418911,
"loss": 3.0947,
"step": 32700
},
{
"epoch": 0.27,
"learning_rate": 0.00021816719314268475,
"loss": 3.0941,
"step": 32800
},
{
"epoch": 0.28,
"learning_rate": 0.00021791582528257934,
"loss": 3.1004,
"step": 32900
},
{
"epoch": 0.28,
"learning_rate": 0.00021766445742247396,
"loss": 3.1003,
"step": 33000
},
{
"epoch": 0.28,
"eval_accuracy": 0.4228331011940198,
"eval_loss": 3.1163218021392822,
"eval_runtime": 37.158,
"eval_samples_per_second": 301.846,
"eval_steps_per_second": 2.53,
"step": 33000
},
{
"epoch": 0.28,
"learning_rate": 0.00021741308956236852,
"loss": 3.0986,
"step": 33100
},
{
"epoch": 0.28,
"learning_rate": 0.00021716172170226314,
"loss": 3.0999,
"step": 33200
},
{
"epoch": 0.28,
"learning_rate": 0.0002169103538421577,
"loss": 3.0994,
"step": 33300
},
{
"epoch": 0.28,
"learning_rate": 0.00021665898598205232,
"loss": 3.0976,
"step": 33400
},
{
"epoch": 0.28,
"learning_rate": 0.00021640761812194691,
"loss": 3.0949,
"step": 33500
},
{
"epoch": 0.28,
"learning_rate": 0.0002161562502618415,
"loss": 3.0923,
"step": 33600
},
{
"epoch": 0.28,
"learning_rate": 0.0002159048824017361,
"loss": 3.0909,
"step": 33700
},
{
"epoch": 0.28,
"learning_rate": 0.0002156535145416307,
"loss": 3.0944,
"step": 33800
},
{
"epoch": 0.28,
"learning_rate": 0.00021540214668152528,
"loss": 3.0997,
"step": 33900
},
{
"epoch": 0.28,
"learning_rate": 0.0002151507788214199,
"loss": 3.097,
"step": 34000
},
{
"epoch": 0.28,
"eval_accuracy": 0.4235625147427185,
"eval_loss": 3.1130168437957764,
"eval_runtime": 36.3501,
"eval_samples_per_second": 308.555,
"eval_steps_per_second": 2.586,
"step": 34000
},
{
"epoch": 0.29,
"learning_rate": 0.00021489941096131446,
"loss": 3.0878,
"step": 34100
},
{
"epoch": 0.29,
"learning_rate": 0.00021464804310120905,
"loss": 3.094,
"step": 34200
},
{
"epoch": 0.29,
"learning_rate": 0.00021439667524110364,
"loss": 3.0976,
"step": 34300
},
{
"epoch": 0.29,
"learning_rate": 0.00021414530738099824,
"loss": 3.0959,
"step": 34400
},
{
"epoch": 0.29,
"learning_rate": 0.00021389393952089285,
"loss": 3.098,
"step": 34500
},
{
"epoch": 0.29,
"learning_rate": 0.00021364257166078742,
"loss": 3.0891,
"step": 34600
},
{
"epoch": 0.29,
"learning_rate": 0.00021339120380068204,
"loss": 3.0881,
"step": 34700
},
{
"epoch": 0.29,
"learning_rate": 0.0002131398359405766,
"loss": 3.0934,
"step": 34800
},
{
"epoch": 0.29,
"learning_rate": 0.00021288846808047122,
"loss": 3.0997,
"step": 34900
},
{
"epoch": 0.29,
"learning_rate": 0.00021263961389896685,
"loss": 3.0934,
"step": 35000
},
{
"epoch": 0.29,
"eval_accuracy": 0.4233083783571276,
"eval_loss": 3.112696886062622,
"eval_runtime": 36.2826,
"eval_samples_per_second": 309.129,
"eval_steps_per_second": 2.591,
"step": 35000
},
{
"epoch": 0.29,
"learning_rate": 0.00021238824603886144,
"loss": 3.0886,
"step": 35100
},
{
"epoch": 0.29,
"learning_rate": 0.00021213687817875606,
"loss": 3.0891,
"step": 35200
},
{
"epoch": 0.3,
"learning_rate": 0.00021188551031865063,
"loss": 3.0952,
"step": 35300
},
{
"epoch": 0.3,
"learning_rate": 0.00021163414245854525,
"loss": 3.0869,
"step": 35400
},
{
"epoch": 0.3,
"learning_rate": 0.0002113827745984398,
"loss": 3.0905,
"step": 35500
},
{
"epoch": 0.3,
"learning_rate": 0.00021113140673833443,
"loss": 3.0939,
"step": 35600
},
{
"epoch": 0.3,
"learning_rate": 0.00021088255255683006,
"loss": 3.0958,
"step": 35700
},
{
"epoch": 0.3,
"learning_rate": 0.00021063118469672465,
"loss": 3.0882,
"step": 35800
},
{
"epoch": 0.3,
"learning_rate": 0.00021037981683661927,
"loss": 3.0852,
"step": 35900
},
{
"epoch": 0.3,
"learning_rate": 0.00021012844897651384,
"loss": 3.0957,
"step": 36000
},
{
"epoch": 0.3,
"eval_accuracy": 0.4237239826120166,
"eval_loss": 3.110541820526123,
"eval_runtime": 37.0216,
"eval_samples_per_second": 302.958,
"eval_steps_per_second": 2.539,
"step": 36000
},
{
"epoch": 0.3,
"learning_rate": 0.00020987708111640845,
"loss": 3.0968,
"step": 36100
},
{
"epoch": 0.3,
"learning_rate": 0.00020962571325630302,
"loss": 3.0909,
"step": 36200
},
{
"epoch": 0.3,
"learning_rate": 0.00020937434539619764,
"loss": 3.0826,
"step": 36300
},
{
"epoch": 0.3,
"learning_rate": 0.00020912297753609223,
"loss": 3.086,
"step": 36400
},
{
"epoch": 0.31,
"learning_rate": 0.0002088716096759868,
"loss": 3.091,
"step": 36500
},
{
"epoch": 0.31,
"learning_rate": 0.00020862275549448245,
"loss": 3.0865,
"step": 36600
},
{
"epoch": 0.31,
"learning_rate": 0.00020837138763437704,
"loss": 3.092,
"step": 36700
},
{
"epoch": 0.31,
"learning_rate": 0.00020812001977427164,
"loss": 3.0916,
"step": 36800
},
{
"epoch": 0.31,
"learning_rate": 0.00020786865191416623,
"loss": 3.0924,
"step": 36900
},
{
"epoch": 0.31,
"learning_rate": 0.00020761728405406082,
"loss": 3.0915,
"step": 37000
},
{
"epoch": 0.31,
"eval_accuracy": 0.42398513933975085,
"eval_loss": 3.10992169380188,
"eval_runtime": 36.5153,
"eval_samples_per_second": 307.159,
"eval_steps_per_second": 2.574,
"step": 37000
},
{
"epoch": 0.31,
"learning_rate": 0.00020736591619395544,
"loss": 3.0841,
"step": 37100
},
{
"epoch": 0.31,
"learning_rate": 0.00020711454833385,
"loss": 3.088,
"step": 37200
},
{
"epoch": 0.31,
"learning_rate": 0.00020686318047374462,
"loss": 3.0941,
"step": 37300
},
{
"epoch": 0.31,
"learning_rate": 0.00020661181261363918,
"loss": 3.0898,
"step": 37400
},
{
"epoch": 0.31,
"learning_rate": 0.0002063604447535338,
"loss": 3.0885,
"step": 37500
},
{
"epoch": 0.31,
"learning_rate": 0.0002061090768934284,
"loss": 3.0918,
"step": 37600
},
{
"epoch": 0.32,
"learning_rate": 0.00020585770903332298,
"loss": 3.0962,
"step": 37700
},
{
"epoch": 0.32,
"learning_rate": 0.00020560634117321758,
"loss": 3.096,
"step": 37800
},
{
"epoch": 0.32,
"learning_rate": 0.0002053549733131122,
"loss": 3.0846,
"step": 37900
},
{
"epoch": 0.32,
"learning_rate": 0.00020510611913160783,
"loss": 3.0908,
"step": 38000
},
{
"epoch": 0.32,
"eval_accuracy": 0.4245425545059364,
"eval_loss": 3.1069419384002686,
"eval_runtime": 37.2669,
"eval_samples_per_second": 300.964,
"eval_steps_per_second": 2.522,
"step": 38000
},
{
"epoch": 0.32,
"learning_rate": 0.0002048547512715024,
"loss": 3.0851,
"step": 38100
},
{
"epoch": 0.32,
"learning_rate": 0.000204603383411397,
"loss": 3.0859,
"step": 38200
},
{
"epoch": 0.32,
"learning_rate": 0.0002043520155512916,
"loss": 3.0877,
"step": 38300
},
{
"epoch": 0.32,
"learning_rate": 0.0002041006476911862,
"loss": 3.08,
"step": 38400
},
{
"epoch": 0.32,
"learning_rate": 0.00020384927983108079,
"loss": 3.0872,
"step": 38500
},
{
"epoch": 0.32,
"learning_rate": 0.00020359791197097535,
"loss": 3.0934,
"step": 38600
},
{
"epoch": 0.32,
"learning_rate": 0.00020334654411086997,
"loss": 3.0898,
"step": 38700
},
{
"epoch": 0.32,
"learning_rate": 0.00020309517625076456,
"loss": 3.091,
"step": 38800
},
{
"epoch": 0.33,
"learning_rate": 0.00020284380839065915,
"loss": 3.0903,
"step": 38900
},
{
"epoch": 0.33,
"learning_rate": 0.00020259244053055374,
"loss": 3.0764,
"step": 39000
},
{
"epoch": 0.33,
"eval_accuracy": 0.42455589315600883,
"eval_loss": 3.104147434234619,
"eval_runtime": 36.3216,
"eval_samples_per_second": 308.797,
"eval_steps_per_second": 2.588,
"step": 39000
},
{
"epoch": 0.33,
"learning_rate": 0.00020234107267044833,
"loss": 3.0781,
"step": 39100
},
{
"epoch": 0.33,
"learning_rate": 0.00020208970481034292,
"loss": 3.0805,
"step": 39200
},
{
"epoch": 0.33,
"learning_rate": 0.00020183833695023754,
"loss": 3.0861,
"step": 39300
},
{
"epoch": 0.33,
"learning_rate": 0.0002015869690901321,
"loss": 3.0906,
"step": 39400
},
{
"epoch": 0.33,
"learning_rate": 0.00020133560123002672,
"loss": 3.0837,
"step": 39500
},
{
"epoch": 0.33,
"learning_rate": 0.0002010842333699213,
"loss": 3.0827,
"step": 39600
},
{
"epoch": 0.33,
"learning_rate": 0.0002008328655098159,
"loss": 3.082,
"step": 39700
},
{
"epoch": 0.33,
"learning_rate": 0.0002005814976497105,
"loss": 3.0838,
"step": 39800
},
{
"epoch": 0.33,
"learning_rate": 0.0002003301297896051,
"loss": 3.0834,
"step": 39900
},
{
"epoch": 0.33,
"learning_rate": 0.00020007876192949968,
"loss": 3.0855,
"step": 40000
},
{
"epoch": 0.33,
"eval_accuracy": 0.42506837813247667,
"eval_loss": 3.1023147106170654,
"eval_runtime": 36.3302,
"eval_samples_per_second": 308.724,
"eval_steps_per_second": 2.587,
"step": 40000
},
{
"epoch": 0.34,
"learning_rate": 0.00019982990774799532,
"loss": 3.0823,
"step": 40100
},
{
"epoch": 0.34,
"learning_rate": 0.00019957853988788993,
"loss": 3.0877,
"step": 40200
},
{
"epoch": 0.34,
"learning_rate": 0.0001993271720277845,
"loss": 3.0891,
"step": 40300
},
{
"epoch": 0.34,
"learning_rate": 0.0001990758041676791,
"loss": 3.0847,
"step": 40400
},
{
"epoch": 0.34,
"learning_rate": 0.0001988244363075737,
"loss": 3.0769,
"step": 40500
},
{
"epoch": 0.34,
"learning_rate": 0.00019857306844746827,
"loss": 3.0842,
"step": 40600
},
{
"epoch": 0.34,
"learning_rate": 0.0001983217005873629,
"loss": 3.0771,
"step": 40700
},
{
"epoch": 0.34,
"learning_rate": 0.00019807033272725745,
"loss": 3.0878,
"step": 40800
},
{
"epoch": 0.34,
"learning_rate": 0.00019781896486715207,
"loss": 3.0876,
"step": 40900
},
{
"epoch": 0.34,
"learning_rate": 0.00019756759700704666,
"loss": 3.0782,
"step": 41000
},
{
"epoch": 0.34,
"eval_accuracy": 0.42481002954159974,
"eval_loss": 3.100797414779663,
"eval_runtime": 37.0564,
"eval_samples_per_second": 302.674,
"eval_steps_per_second": 2.537,
"step": 41000
},
{
"epoch": 0.34,
"learning_rate": 0.00019731622914694126,
"loss": 3.0788,
"step": 41100
},
{
"epoch": 0.34,
"learning_rate": 0.00019706486128683585,
"loss": 3.0811,
"step": 41200
},
{
"epoch": 0.35,
"learning_rate": 0.00019681349342673044,
"loss": 3.0799,
"step": 41300
},
{
"epoch": 0.35,
"learning_rate": 0.00019656212556662503,
"loss": 3.0737,
"step": 41400
},
{
"epoch": 0.35,
"learning_rate": 0.00019631075770651965,
"loss": 3.0815,
"step": 41500
},
{
"epoch": 0.35,
"learning_rate": 0.0001960593898464142,
"loss": 3.0885,
"step": 41600
},
{
"epoch": 0.35,
"learning_rate": 0.00019580802198630883,
"loss": 3.0785,
"step": 41700
},
{
"epoch": 0.35,
"learning_rate": 0.0001955566541262034,
"loss": 3.0738,
"step": 41800
},
{
"epoch": 0.35,
"learning_rate": 0.000195305286266098,
"loss": 3.0826,
"step": 41900
},
{
"epoch": 0.35,
"learning_rate": 0.0001950539184059926,
"loss": 3.0821,
"step": 42000
},
{
"epoch": 0.35,
"eval_accuracy": 0.4254973210374381,
"eval_loss": 3.0979230403900146,
"eval_runtime": 36.8694,
"eval_samples_per_second": 304.209,
"eval_steps_per_second": 2.55,
"step": 42000
},
{
"epoch": 0.35,
"learning_rate": 0.00019480255054588717,
"loss": 3.0689,
"step": 42100
},
{
"epoch": 0.35,
"learning_rate": 0.00019455369636438286,
"loss": 3.0767,
"step": 42200
},
{
"epoch": 0.35,
"learning_rate": 0.0001943048421828785,
"loss": 3.0768,
"step": 42300
},
{
"epoch": 0.35,
"learning_rate": 0.00019405598800137415,
"loss": 3.0746,
"step": 42400
},
{
"epoch": 0.36,
"learning_rate": 0.00019380462014126872,
"loss": 3.0812,
"step": 42500
},
{
"epoch": 0.36,
"learning_rate": 0.00019355325228116333,
"loss": 3.0721,
"step": 42600
},
{
"epoch": 0.36,
"learning_rate": 0.0001933018844210579,
"loss": 3.0701,
"step": 42700
},
{
"epoch": 0.36,
"learning_rate": 0.0001930505165609525,
"loss": 3.0769,
"step": 42800
},
{
"epoch": 0.36,
"learning_rate": 0.00019279914870084708,
"loss": 3.0827,
"step": 42900
},
{
"epoch": 0.36,
"learning_rate": 0.00019254778084074167,
"loss": 3.075,
"step": 43000
},
{
"epoch": 0.36,
"eval_accuracy": 0.425449582710863,
"eval_loss": 3.0971269607543945,
"eval_runtime": 36.1836,
"eval_samples_per_second": 309.975,
"eval_steps_per_second": 2.598,
"step": 43000
},
{
"epoch": 0.36,
"learning_rate": 0.0001922964129806363,
"loss": 3.0742,
"step": 43100
},
{
"epoch": 0.36,
"learning_rate": 0.00019204504512053086,
"loss": 3.0804,
"step": 43200
},
{
"epoch": 0.36,
"learning_rate": 0.00019179367726042547,
"loss": 3.0788,
"step": 43300
},
{
"epoch": 0.36,
"learning_rate": 0.00019154230940032004,
"loss": 3.078,
"step": 43400
},
{
"epoch": 0.36,
"learning_rate": 0.00019129094154021466,
"loss": 3.0729,
"step": 43500
},
{
"epoch": 0.36,
"learning_rate": 0.00019103957368010925,
"loss": 3.0704,
"step": 43600
},
{
"epoch": 0.37,
"learning_rate": 0.00019078820582000384,
"loss": 3.0793,
"step": 43700
},
{
"epoch": 0.37,
"learning_rate": 0.00019053683795989843,
"loss": 3.0789,
"step": 43800
},
{
"epoch": 0.37,
"learning_rate": 0.00019028547009979305,
"loss": 3.0835,
"step": 43900
},
{
"epoch": 0.37,
"learning_rate": 0.00019003661591828868,
"loss": 3.0794,
"step": 44000
},
{
"epoch": 0.37,
"eval_accuracy": 0.4256580868725218,
"eval_loss": 3.0950751304626465,
"eval_runtime": 36.1829,
"eval_samples_per_second": 309.98,
"eval_steps_per_second": 2.598,
"step": 44000
},
{
"epoch": 0.37,
"learning_rate": 0.00018978524805818325,
"loss": 3.0746,
"step": 44100
},
{
"epoch": 0.37,
"learning_rate": 0.00018953388019807787,
"loss": 3.0778,
"step": 44200
},
{
"epoch": 0.37,
"learning_rate": 0.00018928251233797246,
"loss": 3.0743,
"step": 44300
},
{
"epoch": 0.37,
"learning_rate": 0.00018903114447786705,
"loss": 3.0822,
"step": 44400
},
{
"epoch": 0.37,
"learning_rate": 0.00018877977661776164,
"loss": 3.0782,
"step": 44500
},
{
"epoch": 0.37,
"learning_rate": 0.0001885284087576562,
"loss": 3.0705,
"step": 44600
},
{
"epoch": 0.37,
"learning_rate": 0.00018827704089755082,
"loss": 3.0737,
"step": 44700
},
{
"epoch": 0.37,
"learning_rate": 0.0001880256730374454,
"loss": 3.0736,
"step": 44800
},
{
"epoch": 0.38,
"learning_rate": 0.00018777681885594105,
"loss": 3.0712,
"step": 44900
},
{
"epoch": 0.38,
"learning_rate": 0.00018752545099583567,
"loss": 3.0836,
"step": 45000
},
{
"epoch": 0.38,
"eval_accuracy": 0.42573460860188483,
"eval_loss": 3.0936806201934814,
"eval_runtime": 36.1343,
"eval_samples_per_second": 310.398,
"eval_steps_per_second": 2.601,
"step": 45000
},
{
"epoch": 0.38,
"learning_rate": 0.00018727408313573023,
"loss": 3.0763,
"step": 45100
},
{
"epoch": 0.38,
"learning_rate": 0.00018702271527562485,
"loss": 3.0831,
"step": 45200
},
{
"epoch": 0.38,
"learning_rate": 0.0001867713474155194,
"loss": 3.0768,
"step": 45300
},
{
"epoch": 0.38,
"learning_rate": 0.00018651997955541403,
"loss": 3.0686,
"step": 45400
},
{
"epoch": 0.38,
"learning_rate": 0.00018626861169530862,
"loss": 3.0766,
"step": 45500
},
{
"epoch": 0.38,
"learning_rate": 0.0001860172438352032,
"loss": 3.0721,
"step": 45600
},
{
"epoch": 0.38,
"learning_rate": 0.0001857658759750978,
"loss": 3.0812,
"step": 45700
},
{
"epoch": 0.38,
"learning_rate": 0.00018551450811499242,
"loss": 3.0853,
"step": 45800
},
{
"epoch": 0.38,
"learning_rate": 0.000185263140254887,
"loss": 3.0753,
"step": 45900
},
{
"epoch": 0.38,
"learning_rate": 0.0001850117723947816,
"loss": 3.0744,
"step": 46000
},
{
"epoch": 0.38,
"eval_accuracy": 0.42582517101553463,
"eval_loss": 3.092123508453369,
"eval_runtime": 36.2715,
"eval_samples_per_second": 309.224,
"eval_steps_per_second": 2.592,
"step": 46000
},
{
"epoch": 0.39,
"learning_rate": 0.00018476040453467617,
"loss": 3.077,
"step": 46100
},
{
"epoch": 0.39,
"learning_rate": 0.0001845090366745708,
"loss": 3.0822,
"step": 46200
},
{
"epoch": 0.39,
"learning_rate": 0.00018425766881446535,
"loss": 3.0791,
"step": 46300
},
{
"epoch": 0.39,
"learning_rate": 0.00018400630095435997,
"loss": 3.0776,
"step": 46400
},
{
"epoch": 0.39,
"learning_rate": 0.00018375493309425456,
"loss": 3.0781,
"step": 46500
},
{
"epoch": 0.39,
"learning_rate": 0.00018350356523414913,
"loss": 3.0756,
"step": 46600
},
{
"epoch": 0.39,
"learning_rate": 0.00018325219737404374,
"loss": 3.0739,
"step": 46700
},
{
"epoch": 0.39,
"learning_rate": 0.0001830008295139383,
"loss": 3.0697,
"step": 46800
},
{
"epoch": 0.39,
"learning_rate": 0.00018274946165383293,
"loss": 3.0747,
"step": 46900
},
{
"epoch": 0.39,
"learning_rate": 0.00018249809379372752,
"loss": 3.0692,
"step": 47000
},
{
"epoch": 0.39,
"eval_accuracy": 0.42626464443371115,
"eval_loss": 3.090735912322998,
"eval_runtime": 36.0323,
"eval_samples_per_second": 311.276,
"eval_steps_per_second": 2.609,
"step": 47000
},
{
"epoch": 0.39,
"learning_rate": 0.0001822467259336221,
"loss": 3.0701,
"step": 47100
},
{
"epoch": 0.39,
"learning_rate": 0.0001819953580735167,
"loss": 3.0706,
"step": 47200
},
{
"epoch": 0.4,
"learning_rate": 0.00018174399021341132,
"loss": 3.0734,
"step": 47300
},
{
"epoch": 0.4,
"learning_rate": 0.00018149262235330588,
"loss": 3.0719,
"step": 47400
},
{
"epoch": 0.4,
"learning_rate": 0.0001812412544932005,
"loss": 3.07,
"step": 47500
},
{
"epoch": 0.4,
"learning_rate": 0.00018098988663309507,
"loss": 3.0743,
"step": 47600
},
{
"epoch": 0.4,
"learning_rate": 0.00018074103245159073,
"loss": 3.0768,
"step": 47700
},
{
"epoch": 0.4,
"learning_rate": 0.00018048966459148532,
"loss": 3.0598,
"step": 47800
},
{
"epoch": 0.4,
"learning_rate": 0.0001802382967313799,
"loss": 3.0653,
"step": 47900
},
{
"epoch": 0.4,
"learning_rate": 0.00017998692887127453,
"loss": 3.0717,
"step": 48000
},
{
"epoch": 0.4,
"eval_accuracy": 0.42618812270434814,
"eval_loss": 3.0900797843933105,
"eval_runtime": 36.3,
"eval_samples_per_second": 308.981,
"eval_steps_per_second": 2.59,
"step": 48000
},
{
"epoch": 0.4,
"learning_rate": 0.0001797355610111691,
"loss": 3.0752,
"step": 48100
},
{
"epoch": 0.4,
"learning_rate": 0.0001794841931510637,
"loss": 3.0656,
"step": 48200
},
{
"epoch": 0.4,
"learning_rate": 0.00017923282529095827,
"loss": 3.0758,
"step": 48300
},
{
"epoch": 0.4,
"learning_rate": 0.00017898145743085287,
"loss": 3.0827,
"step": 48400
},
{
"epoch": 0.41,
"learning_rate": 0.00017873008957074746,
"loss": 3.068,
"step": 48500
},
{
"epoch": 0.41,
"learning_rate": 0.00017847872171064205,
"loss": 3.0645,
"step": 48600
},
{
"epoch": 0.41,
"learning_rate": 0.00017822735385053667,
"loss": 3.0752,
"step": 48700
},
{
"epoch": 0.41,
"learning_rate": 0.00017797598599043123,
"loss": 3.0726,
"step": 48800
},
{
"epoch": 0.41,
"learning_rate": 0.00017772461813032585,
"loss": 3.0736,
"step": 48900
},
{
"epoch": 0.41,
"learning_rate": 0.00017747325027022041,
"loss": 3.0697,
"step": 49000
},
{
"epoch": 0.41,
"eval_accuracy": 0.42656862524852013,
"eval_loss": 3.0877325534820557,
"eval_runtime": 37.2501,
"eval_samples_per_second": 301.1,
"eval_steps_per_second": 2.523,
"step": 49000
},
{
"epoch": 0.41,
"learning_rate": 0.00017722188241011503,
"loss": 3.0779,
"step": 49100
},
{
"epoch": 0.41,
"learning_rate": 0.00017697051455000962,
"loss": 3.0736,
"step": 49200
},
{
"epoch": 0.41,
"learning_rate": 0.00017671914668990421,
"loss": 3.0657,
"step": 49300
},
{
"epoch": 0.41,
"learning_rate": 0.0001764677788297988,
"loss": 3.065,
"step": 49400
},
{
"epoch": 0.41,
"learning_rate": 0.00017621641096969342,
"loss": 3.0683,
"step": 49500
},
{
"epoch": 0.41,
"learning_rate": 0.000175965043109588,
"loss": 3.0656,
"step": 49600
},
{
"epoch": 0.42,
"learning_rate": 0.0001757136752494826,
"loss": 3.0714,
"step": 49700
},
{
"epoch": 0.42,
"learning_rate": 0.00017546482106797824,
"loss": 3.0804,
"step": 49800
},
{
"epoch": 0.42,
"learning_rate": 0.00017521345320787283,
"loss": 3.0636,
"step": 49900
},
{
"epoch": 0.42,
"learning_rate": 0.00017496208534776742,
"loss": 3.0689,
"step": 50000
},
{
"epoch": 0.42,
"eval_accuracy": 0.426702713783459,
"eval_loss": 3.0857808589935303,
"eval_runtime": 36.1585,
"eval_samples_per_second": 310.189,
"eval_steps_per_second": 2.6,
"step": 50000
},
{
"epoch": 0.42,
"learning_rate": 0.00017471071748766202,
"loss": 3.0627,
"step": 50100
},
{
"epoch": 0.42,
"learning_rate": 0.00017445934962755663,
"loss": 3.0655,
"step": 50200
},
{
"epoch": 0.42,
"learning_rate": 0.0001742079817674512,
"loss": 3.0711,
"step": 50300
},
{
"epoch": 0.42,
"learning_rate": 0.0001739566139073458,
"loss": 3.0684,
"step": 50400
},
{
"epoch": 0.42,
"learning_rate": 0.00017370775972584142,
"loss": 3.066,
"step": 50500
},
{
"epoch": 0.42,
"learning_rate": 0.00017345639186573604,
"loss": 3.0587,
"step": 50600
},
{
"epoch": 0.42,
"learning_rate": 0.0001732050240056306,
"loss": 3.0705,
"step": 50700
},
{
"epoch": 0.42,
"learning_rate": 0.00017295365614552522,
"loss": 3.0652,
"step": 50800
},
{
"epoch": 0.43,
"learning_rate": 0.0001727022882854198,
"loss": 3.0718,
"step": 50900
},
{
"epoch": 0.43,
"learning_rate": 0.0001724509204253144,
"loss": 3.067,
"step": 51000
},
{
"epoch": 0.43,
"eval_accuracy": 0.42674553787053365,
"eval_loss": 3.08451247215271,
"eval_runtime": 37.357,
"eval_samples_per_second": 300.238,
"eval_steps_per_second": 2.516,
"step": 51000
},
{
"epoch": 0.43,
"learning_rate": 0.000172199552565209,
"loss": 3.0652,
"step": 51100
},
{
"epoch": 0.43,
"learning_rate": 0.0001719481847051036,
"loss": 3.0697,
"step": 51200
},
{
"epoch": 0.43,
"learning_rate": 0.00017169681684499818,
"loss": 3.0699,
"step": 51300
},
{
"epoch": 0.43,
"learning_rate": 0.0001714454489848928,
"loss": 3.0656,
"step": 51400
},
{
"epoch": 0.43,
"learning_rate": 0.00017119408112478736,
"loss": 3.0579,
"step": 51500
},
{
"epoch": 0.43,
"learning_rate": 0.00017094271326468198,
"loss": 3.0586,
"step": 51600
},
{
"epoch": 0.43,
"learning_rate": 0.00017069134540457655,
"loss": 3.0725,
"step": 51700
},
{
"epoch": 0.43,
"learning_rate": 0.00017043997754447116,
"loss": 3.0713,
"step": 51800
},
{
"epoch": 0.43,
"learning_rate": 0.00017018860968436573,
"loss": 3.0674,
"step": 51900
},
{
"epoch": 0.43,
"learning_rate": 0.00016993724182426035,
"loss": 3.0635,
"step": 52000
},
{
"epoch": 0.43,
"eval_accuracy": 0.4271583339885653,
"eval_loss": 3.082775115966797,
"eval_runtime": 36.4468,
"eval_samples_per_second": 307.736,
"eval_steps_per_second": 2.579,
"step": 52000
},
{
"epoch": 0.44,
"learning_rate": 0.00016968587396415494,
"loss": 3.0589,
"step": 52100
},
{
"epoch": 0.44,
"learning_rate": 0.0001694345061040495,
"loss": 3.0656,
"step": 52200
},
{
"epoch": 0.44,
"learning_rate": 0.00016918313824394412,
"loss": 3.0657,
"step": 52300
},
{
"epoch": 0.44,
"learning_rate": 0.00016893177038383868,
"loss": 3.0622,
"step": 52400
},
{
"epoch": 0.44,
"learning_rate": 0.0001686804025237333,
"loss": 3.0627,
"step": 52500
},
{
"epoch": 0.44,
"learning_rate": 0.0001684290346636279,
"loss": 3.063,
"step": 52600
},
{
"epoch": 0.44,
"learning_rate": 0.00016817766680352249,
"loss": 3.0637,
"step": 52700
},
{
"epoch": 0.44,
"learning_rate": 0.00016792629894341708,
"loss": 3.0639,
"step": 52800
},
{
"epoch": 0.44,
"learning_rate": 0.0001676774447619127,
"loss": 3.0639,
"step": 52900
},
{
"epoch": 0.44,
"learning_rate": 0.00016742607690180733,
"loss": 3.0678,
"step": 53000
},
{
"epoch": 0.44,
"eval_accuracy": 0.4273408628842935,
"eval_loss": 3.0823299884796143,
"eval_runtime": 36.1917,
"eval_samples_per_second": 309.906,
"eval_steps_per_second": 2.597,
"step": 53000
},
{
"epoch": 0.44,
"learning_rate": 0.0001671747090417019,
"loss": 3.0582,
"step": 53100
},
{
"epoch": 0.45,
"learning_rate": 0.0001669233411815965,
"loss": 3.0708,
"step": 53200
},
{
"epoch": 0.45,
"learning_rate": 0.0001666719733214911,
"loss": 3.0692,
"step": 53300
},
{
"epoch": 0.45,
"learning_rate": 0.0001664206054613857,
"loss": 3.0671,
"step": 53400
},
{
"epoch": 0.45,
"learning_rate": 0.00016616923760128029,
"loss": 3.0662,
"step": 53500
},
{
"epoch": 0.45,
"learning_rate": 0.0001659178697411749,
"loss": 3.0653,
"step": 53600
},
{
"epoch": 0.45,
"learning_rate": 0.00016566650188106947,
"loss": 3.0669,
"step": 53700
},
{
"epoch": 0.45,
"learning_rate": 0.0001654151340209641,
"loss": 3.0552,
"step": 53800
},
{
"epoch": 0.45,
"learning_rate": 0.00016516376616085865,
"loss": 3.0569,
"step": 53900
},
{
"epoch": 0.45,
"learning_rate": 0.00016491239830075327,
"loss": 3.067,
"step": 54000
},
{
"epoch": 0.45,
"eval_accuracy": 0.4276448436991025,
"eval_loss": 3.0794825553894043,
"eval_runtime": 36.2802,
"eval_samples_per_second": 309.15,
"eval_steps_per_second": 2.591,
"step": 54000
},
{
"epoch": 0.45,
"learning_rate": 0.00016466103044064783,
"loss": 3.0623,
"step": 54100
},
{
"epoch": 0.45,
"learning_rate": 0.00016440966258054242,
"loss": 3.0612,
"step": 54200
},
{
"epoch": 0.45,
"learning_rate": 0.00016415829472043704,
"loss": 3.0588,
"step": 54300
},
{
"epoch": 0.46,
"learning_rate": 0.0001639069268603316,
"loss": 3.064,
"step": 54400
},
{
"epoch": 0.46,
"learning_rate": 0.00016365555900022623,
"loss": 3.0564,
"step": 54500
},
{
"epoch": 0.46,
"learning_rate": 0.0001634041911401208,
"loss": 3.0605,
"step": 54600
},
{
"epoch": 0.46,
"learning_rate": 0.00016315533695861645,
"loss": 3.0591,
"step": 54700
},
{
"epoch": 0.46,
"learning_rate": 0.00016290396909851104,
"loss": 3.0639,
"step": 54800
},
{
"epoch": 0.46,
"learning_rate": 0.00016265260123840563,
"loss": 3.0612,
"step": 54900
},
{
"epoch": 0.46,
"learning_rate": 0.00016240123337830025,
"loss": 3.0597,
"step": 55000
},
{
"epoch": 0.46,
"eval_accuracy": 0.4277283857706089,
"eval_loss": 3.078927516937256,
"eval_runtime": 36.9604,
"eval_samples_per_second": 303.46,
"eval_steps_per_second": 2.543,
"step": 55000
},
{
"epoch": 0.46,
"learning_rate": 0.00016214986551819482,
"loss": 3.0607,
"step": 55100
},
{
"epoch": 0.46,
"learning_rate": 0.00016190101133669048,
"loss": 3.0505,
"step": 55200
},
{
"epoch": 0.46,
"learning_rate": 0.00016164964347658507,
"loss": 3.0628,
"step": 55300
},
{
"epoch": 0.46,
"learning_rate": 0.00016139827561647966,
"loss": 3.0592,
"step": 55400
},
{
"epoch": 0.46,
"learning_rate": 0.00016114690775637428,
"loss": 3.0488,
"step": 55500
},
{
"epoch": 0.47,
"learning_rate": 0.00016089553989626884,
"loss": 3.0533,
"step": 55600
},
{
"epoch": 0.47,
"learning_rate": 0.00016064417203616346,
"loss": 3.0666,
"step": 55700
},
{
"epoch": 0.47,
"learning_rate": 0.00016039280417605803,
"loss": 3.0596,
"step": 55800
},
{
"epoch": 0.47,
"learning_rate": 0.00016014143631595264,
"loss": 3.0604,
"step": 55900
},
{
"epoch": 0.47,
"learning_rate": 0.0001598900684558472,
"loss": 3.0648,
"step": 56000
},
{
"epoch": 0.47,
"eval_accuracy": 0.4278596661686904,
"eval_loss": 3.0768725872039795,
"eval_runtime": 37.0258,
"eval_samples_per_second": 302.924,
"eval_steps_per_second": 2.539,
"step": 56000
},
{
"epoch": 0.47,
"learning_rate": 0.00015963870059574183,
"loss": 3.0614,
"step": 56100
},
{
"epoch": 0.47,
"learning_rate": 0.00015938733273563642,
"loss": 3.0541,
"step": 56200
},
{
"epoch": 0.47,
"learning_rate": 0.00015913596487553098,
"loss": 3.0595,
"step": 56300
},
{
"epoch": 0.47,
"learning_rate": 0.0001588845970154256,
"loss": 3.0624,
"step": 56400
},
{
"epoch": 0.47,
"learning_rate": 0.00015863322915532016,
"loss": 3.055,
"step": 56500
},
{
"epoch": 0.47,
"learning_rate": 0.00015838186129521478,
"loss": 3.0585,
"step": 56600
},
{
"epoch": 0.47,
"learning_rate": 0.00015813049343510937,
"loss": 3.0555,
"step": 56700
},
{
"epoch": 0.48,
"learning_rate": 0.00015787912557500397,
"loss": 3.0501,
"step": 56800
},
{
"epoch": 0.48,
"learning_rate": 0.00015762775771489856,
"loss": 3.0667,
"step": 56900
},
{
"epoch": 0.48,
"learning_rate": 0.00015737638985479315,
"loss": 3.0681,
"step": 57000
},
{
"epoch": 0.48,
"eval_accuracy": 0.42812924730699675,
"eval_loss": 3.075896739959717,
"eval_runtime": 36.4669,
"eval_samples_per_second": 307.567,
"eval_steps_per_second": 2.578,
"step": 57000
},
{
"epoch": 0.48,
"learning_rate": 0.00015712502199468774,
"loss": 3.0554,
"step": 57100
},
{
"epoch": 0.48,
"learning_rate": 0.00015687365413458236,
"loss": 3.063,
"step": 57200
},
{
"epoch": 0.48,
"learning_rate": 0.00015662228627447692,
"loss": 3.0611,
"step": 57300
},
{
"epoch": 0.48,
"learning_rate": 0.00015637091841437154,
"loss": 3.0647,
"step": 57400
},
{
"epoch": 0.48,
"learning_rate": 0.0001561195505542661,
"loss": 3.0552,
"step": 57500
},
{
"epoch": 0.48,
"learning_rate": 0.00015586818269416072,
"loss": 3.0629,
"step": 57600
},
{
"epoch": 0.48,
"learning_rate": 0.00015561932851265638,
"loss": 3.0619,
"step": 57700
},
{
"epoch": 0.48,
"learning_rate": 0.00015536796065255095,
"loss": 3.0531,
"step": 57800
},
{
"epoch": 0.48,
"learning_rate": 0.00015511659279244557,
"loss": 3.063,
"step": 57900
},
{
"epoch": 0.49,
"learning_rate": 0.00015486522493234013,
"loss": 3.0513,
"step": 58000
},
{
"epoch": 0.49,
"eval_accuracy": 0.42832300875015444,
"eval_loss": 3.0737130641937256,
"eval_runtime": 36.8692,
"eval_samples_per_second": 304.211,
"eval_steps_per_second": 2.55,
"step": 58000
},
{
"epoch": 0.49,
"learning_rate": 0.00015461385707223472,
"loss": 3.0546,
"step": 58100
},
{
"epoch": 0.49,
"learning_rate": 0.0001543624892121293,
"loss": 3.0545,
"step": 58200
},
{
"epoch": 0.49,
"learning_rate": 0.0001541111213520239,
"loss": 3.0543,
"step": 58300
},
{
"epoch": 0.49,
"learning_rate": 0.00015385975349191852,
"loss": 3.0525,
"step": 58400
},
{
"epoch": 0.49,
"learning_rate": 0.0001536083856318131,
"loss": 3.0533,
"step": 58500
},
{
"epoch": 0.49,
"learning_rate": 0.0001533570177717077,
"loss": 3.0616,
"step": 58600
},
{
"epoch": 0.49,
"learning_rate": 0.00015310564991160227,
"loss": 3.0542,
"step": 58700
},
{
"epoch": 0.49,
"learning_rate": 0.0001528542820514969,
"loss": 3.0543,
"step": 58800
},
{
"epoch": 0.49,
"learning_rate": 0.00015260291419139148,
"loss": 3.0603,
"step": 58900
},
{
"epoch": 0.49,
"learning_rate": 0.00015235154633128607,
"loss": 3.0566,
"step": 59000
},
{
"epoch": 0.49,
"eval_accuracy": 0.42880530625540564,
"eval_loss": 3.0726654529571533,
"eval_runtime": 36.7531,
"eval_samples_per_second": 305.171,
"eval_steps_per_second": 2.558,
"step": 59000
},
{
"epoch": 0.49,
"learning_rate": 0.00015210017847118066,
"loss": 3.0475,
"step": 59100
},
{
"epoch": 0.5,
"learning_rate": 0.00015184881061107528,
"loss": 3.0545,
"step": 59200
},
{
"epoch": 0.5,
"learning_rate": 0.00015159744275096984,
"loss": 3.0616,
"step": 59300
},
{
"epoch": 0.5,
"learning_rate": 0.00015134607489086446,
"loss": 3.0503,
"step": 59400
},
{
"epoch": 0.5,
"learning_rate": 0.00015109470703075903,
"loss": 3.0462,
"step": 59500
},
{
"epoch": 0.5,
"learning_rate": 0.00015084333917065365,
"loss": 3.0586,
"step": 59600
},
{
"epoch": 0.5,
"learning_rate": 0.00015059448498914928,
"loss": 3.0502,
"step": 59700
},
{
"epoch": 0.5,
"learning_rate": 0.00015034563080764494,
"loss": 3.0535,
"step": 59800
},
{
"epoch": 0.5,
"learning_rate": 0.0001500942629475395,
"loss": 3.0608,
"step": 59900
},
{
"epoch": 0.5,
"learning_rate": 0.0001498428950874341,
"loss": 3.0552,
"step": 60000
},
{
"epoch": 0.5,
"eval_accuracy": 0.42880671032383433,
"eval_loss": 3.071218967437744,
"eval_runtime": 36.2431,
"eval_samples_per_second": 309.466,
"eval_steps_per_second": 2.594,
"step": 60000
},
{
"epoch": 0.5,
"learning_rate": 0.0001495915272273287,
"loss": 3.0546,
"step": 60100
},
{
"epoch": 0.5,
"learning_rate": 0.0001493401593672233,
"loss": 3.053,
"step": 60200
},
{
"epoch": 0.5,
"learning_rate": 0.0001490887915071179,
"loss": 3.0506,
"step": 60300
},
{
"epoch": 0.51,
"learning_rate": 0.0001488374236470125,
"loss": 3.0562,
"step": 60400
},
{
"epoch": 0.51,
"learning_rate": 0.00014858605578690708,
"loss": 3.0572,
"step": 60500
},
{
"epoch": 0.51,
"learning_rate": 0.00014833468792680167,
"loss": 3.0568,
"step": 60600
},
{
"epoch": 0.51,
"learning_rate": 0.00014808332006669626,
"loss": 3.0571,
"step": 60700
},
{
"epoch": 0.51,
"learning_rate": 0.00014783195220659085,
"loss": 3.0483,
"step": 60800
},
{
"epoch": 0.51,
"learning_rate": 0.00014758058434648544,
"loss": 3.0486,
"step": 60900
},
{
"epoch": 0.51,
"learning_rate": 0.00014732921648638004,
"loss": 3.0457,
"step": 61000
},
{
"epoch": 0.51,
"eval_accuracy": 0.42915000505464634,
"eval_loss": 3.0692341327667236,
"eval_runtime": 36.4055,
"eval_samples_per_second": 308.086,
"eval_steps_per_second": 2.582,
"step": 61000
},
{
"epoch": 0.51,
"learning_rate": 0.00014707784862627463,
"loss": 3.0448,
"step": 61100
},
{
"epoch": 0.51,
"learning_rate": 0.00014682648076616922,
"loss": 3.0498,
"step": 61200
},
{
"epoch": 0.51,
"learning_rate": 0.0001465751129060638,
"loss": 3.0505,
"step": 61300
},
{
"epoch": 0.51,
"learning_rate": 0.0001463237450459584,
"loss": 3.0531,
"step": 61400
},
{
"epoch": 0.51,
"learning_rate": 0.000146072377185853,
"loss": 3.0526,
"step": 61500
},
{
"epoch": 0.52,
"learning_rate": 0.00014582100932574758,
"loss": 3.0585,
"step": 61600
},
{
"epoch": 0.52,
"learning_rate": 0.0001455696414656422,
"loss": 3.0519,
"step": 61700
},
{
"epoch": 0.52,
"learning_rate": 0.0001453182736055368,
"loss": 3.0545,
"step": 61800
},
{
"epoch": 0.52,
"learning_rate": 0.00014506690574543138,
"loss": 3.0521,
"step": 61900
},
{
"epoch": 0.52,
"learning_rate": 0.00014481553788532598,
"loss": 3.0425,
"step": 62000
},
{
"epoch": 0.52,
"eval_accuracy": 0.4291008626596426,
"eval_loss": 3.0679004192352295,
"eval_runtime": 36.1636,
"eval_samples_per_second": 310.146,
"eval_steps_per_second": 2.599,
"step": 62000
},
{
"epoch": 0.52,
"learning_rate": 0.0001445666837038216,
"loss": 3.0616,
"step": 62100
},
{
"epoch": 0.52,
"learning_rate": 0.0001443153158437162,
"loss": 3.057,
"step": 62200
},
{
"epoch": 0.52,
"learning_rate": 0.0001440639479836108,
"loss": 3.052,
"step": 62300
},
{
"epoch": 0.52,
"learning_rate": 0.0001438125801235054,
"loss": 3.0501,
"step": 62400
},
{
"epoch": 0.52,
"learning_rate": 0.0001435612122634,
"loss": 3.0457,
"step": 62500
},
{
"epoch": 0.52,
"learning_rate": 0.0001433098444032946,
"loss": 3.0506,
"step": 62600
},
{
"epoch": 0.52,
"learning_rate": 0.00014305847654318918,
"loss": 3.0478,
"step": 62700
},
{
"epoch": 0.53,
"learning_rate": 0.00014280710868308378,
"loss": 3.0545,
"step": 62800
},
{
"epoch": 0.53,
"learning_rate": 0.00014255574082297834,
"loss": 3.0554,
"step": 62900
},
{
"epoch": 0.53,
"learning_rate": 0.00014230437296287296,
"loss": 3.0573,
"step": 63000
},
{
"epoch": 0.53,
"eval_accuracy": 0.42917527828636254,
"eval_loss": 3.0663866996765137,
"eval_runtime": 36.3952,
"eval_samples_per_second": 308.172,
"eval_steps_per_second": 2.583,
"step": 63000
},
{
"epoch": 0.53,
"learning_rate": 0.00014205300510276755,
"loss": 3.0485,
"step": 63100
},
{
"epoch": 0.53,
"learning_rate": 0.00014180163724266214,
"loss": 3.0476,
"step": 63200
},
{
"epoch": 0.53,
"learning_rate": 0.00014155026938255673,
"loss": 3.0442,
"step": 63300
},
{
"epoch": 0.53,
"learning_rate": 0.00014129890152245132,
"loss": 3.0486,
"step": 63400
},
{
"epoch": 0.53,
"learning_rate": 0.00014104753366234592,
"loss": 3.0384,
"step": 63500
},
{
"epoch": 0.53,
"learning_rate": 0.0001407961658022405,
"loss": 3.0539,
"step": 63600
},
{
"epoch": 0.53,
"learning_rate": 0.0001405447979421351,
"loss": 3.0429,
"step": 63700
},
{
"epoch": 0.53,
"learning_rate": 0.0001402934300820297,
"loss": 3.0444,
"step": 63800
},
{
"epoch": 0.53,
"learning_rate": 0.00014004457590052535,
"loss": 3.0489,
"step": 63900
},
{
"epoch": 0.54,
"learning_rate": 0.00013979320804041994,
"loss": 3.0555,
"step": 64000
},
{
"epoch": 0.54,
"eval_accuracy": 0.42978183584755186,
"eval_loss": 3.0650320053100586,
"eval_runtime": 37.0145,
"eval_samples_per_second": 303.016,
"eval_steps_per_second": 2.54,
"step": 64000
},
{
"epoch": 0.54,
"learning_rate": 0.00013954184018031453,
"loss": 3.0507,
"step": 64100
},
{
"epoch": 0.54,
"learning_rate": 0.00013929047232020912,
"loss": 3.0453,
"step": 64200
},
{
"epoch": 0.54,
"learning_rate": 0.00013903910446010372,
"loss": 3.0495,
"step": 64300
},
{
"epoch": 0.54,
"learning_rate": 0.0001387877365999983,
"loss": 3.0446,
"step": 64400
},
{
"epoch": 0.54,
"learning_rate": 0.0001385363687398929,
"loss": 3.0488,
"step": 64500
},
{
"epoch": 0.54,
"learning_rate": 0.00013828500087978752,
"loss": 3.0498,
"step": 64600
},
{
"epoch": 0.54,
"learning_rate": 0.0001380336330196821,
"loss": 3.0441,
"step": 64700
},
{
"epoch": 0.54,
"learning_rate": 0.00013778226515957667,
"loss": 3.0435,
"step": 64800
},
{
"epoch": 0.54,
"learning_rate": 0.00013753089729947126,
"loss": 3.0517,
"step": 64900
},
{
"epoch": 0.54,
"learning_rate": 0.00013727952943936585,
"loss": 3.0421,
"step": 65000
},
{
"epoch": 0.54,
"eval_accuracy": 0.4294195861929527,
"eval_loss": 3.0636541843414307,
"eval_runtime": 36.752,
"eval_samples_per_second": 305.181,
"eval_steps_per_second": 2.558,
"step": 65000
},
{
"epoch": 0.54,
"learning_rate": 0.00013702816157926045,
"loss": 3.0412,
"step": 65100
},
{
"epoch": 0.55,
"learning_rate": 0.00013677679371915506,
"loss": 3.0548,
"step": 65200
},
{
"epoch": 0.55,
"learning_rate": 0.00013652542585904966,
"loss": 3.0409,
"step": 65300
},
{
"epoch": 0.55,
"learning_rate": 0.00013627405799894425,
"loss": 3.0377,
"step": 65400
},
{
"epoch": 0.55,
"learning_rate": 0.00013602269013883884,
"loss": 3.0429,
"step": 65500
},
{
"epoch": 0.55,
"learning_rate": 0.00013577383595733447,
"loss": 3.0467,
"step": 65600
},
{
"epoch": 0.55,
"learning_rate": 0.00013552498177583013,
"loss": 3.0496,
"step": 65700
},
{
"epoch": 0.55,
"learning_rate": 0.00013527361391572472,
"loss": 3.0424,
"step": 65800
},
{
"epoch": 0.55,
"learning_rate": 0.00013502224605561932,
"loss": 3.043,
"step": 65900
},
{
"epoch": 0.55,
"learning_rate": 0.0001347708781955139,
"loss": 3.0496,
"step": 66000
},
{
"epoch": 0.55,
"eval_accuracy": 0.42957333168589307,
"eval_loss": 3.062688112258911,
"eval_runtime": 36.3303,
"eval_samples_per_second": 308.723,
"eval_steps_per_second": 2.587,
"step": 66000
},
{
"epoch": 0.55,
"learning_rate": 0.0001345195103354085,
"loss": 3.0434,
"step": 66100
},
{
"epoch": 0.55,
"learning_rate": 0.0001342681424753031,
"loss": 3.0392,
"step": 66200
},
{
"epoch": 0.55,
"learning_rate": 0.00013401677461519768,
"loss": 3.041,
"step": 66300
},
{
"epoch": 0.56,
"learning_rate": 0.00013376540675509227,
"loss": 3.0526,
"step": 66400
},
{
"epoch": 0.56,
"learning_rate": 0.0001335140388949869,
"loss": 3.046,
"step": 66500
},
{
"epoch": 0.56,
"learning_rate": 0.00013326267103488148,
"loss": 3.0398,
"step": 66600
},
{
"epoch": 0.56,
"learning_rate": 0.00013301130317477607,
"loss": 3.0473,
"step": 66700
},
{
"epoch": 0.56,
"learning_rate": 0.00013275993531467066,
"loss": 3.0368,
"step": 66800
},
{
"epoch": 0.56,
"learning_rate": 0.00013250856745456523,
"loss": 3.0427,
"step": 66900
},
{
"epoch": 0.56,
"learning_rate": 0.00013225719959445985,
"loss": 3.0415,
"step": 67000
},
{
"epoch": 0.56,
"eval_accuracy": 0.4300071888303548,
"eval_loss": 3.060805320739746,
"eval_runtime": 37.0174,
"eval_samples_per_second": 302.993,
"eval_steps_per_second": 2.539,
"step": 67000
},
{
"epoch": 0.56,
"learning_rate": 0.00013200583173435444,
"loss": 3.0429,
"step": 67100
},
{
"epoch": 0.56,
"learning_rate": 0.00013175446387424903,
"loss": 3.0494,
"step": 67200
},
{
"epoch": 0.56,
"learning_rate": 0.00013150309601414362,
"loss": 3.0384,
"step": 67300
},
{
"epoch": 0.56,
"learning_rate": 0.0001312517281540382,
"loss": 3.0438,
"step": 67400
},
{
"epoch": 0.56,
"learning_rate": 0.0001310003602939328,
"loss": 3.0427,
"step": 67500
},
{
"epoch": 0.57,
"learning_rate": 0.0001307489924338274,
"loss": 3.0447,
"step": 67600
},
{
"epoch": 0.57,
"learning_rate": 0.00013050013825232306,
"loss": 3.0438,
"step": 67700
},
{
"epoch": 0.57,
"learning_rate": 0.00013024877039221765,
"loss": 3.0403,
"step": 67800
},
{
"epoch": 0.57,
"learning_rate": 0.00012999740253211224,
"loss": 3.0478,
"step": 67900
},
{
"epoch": 0.57,
"learning_rate": 0.00012974603467200683,
"loss": 3.0412,
"step": 68000
},
{
"epoch": 0.57,
"eval_accuracy": 0.4298436148584137,
"eval_loss": 3.0598626136779785,
"eval_runtime": 36.2351,
"eval_samples_per_second": 309.534,
"eval_steps_per_second": 2.594,
"step": 68000
},
{
"epoch": 0.57,
"learning_rate": 0.00012949466681190142,
"loss": 3.0411,
"step": 68100
},
{
"epoch": 0.57,
"learning_rate": 0.000129243298951796,
"loss": 3.035,
"step": 68200
},
{
"epoch": 0.57,
"learning_rate": 0.0001289919310916906,
"loss": 3.0464,
"step": 68300
},
{
"epoch": 0.57,
"learning_rate": 0.0001287405632315852,
"loss": 3.0369,
"step": 68400
},
{
"epoch": 0.57,
"learning_rate": 0.00012848919537147979,
"loss": 3.0428,
"step": 68500
},
{
"epoch": 0.57,
"learning_rate": 0.0001282378275113744,
"loss": 3.0436,
"step": 68600
},
{
"epoch": 0.57,
"learning_rate": 0.000127986459651269,
"loss": 3.0454,
"step": 68700
},
{
"epoch": 0.58,
"learning_rate": 0.00012773509179116356,
"loss": 3.0361,
"step": 68800
},
{
"epoch": 0.58,
"learning_rate": 0.00012748372393105815,
"loss": 3.0437,
"step": 68900
},
{
"epoch": 0.58,
"learning_rate": 0.00012723235607095274,
"loss": 3.0373,
"step": 69000
},
{
"epoch": 0.58,
"eval_accuracy": 0.4302465824974446,
"eval_loss": 3.057598829269409,
"eval_runtime": 36.2031,
"eval_samples_per_second": 309.808,
"eval_steps_per_second": 2.596,
"step": 69000
},
{
"epoch": 0.58,
"learning_rate": 0.0001269835018894484,
"loss": 3.0426,
"step": 69100
},
{
"epoch": 0.58,
"learning_rate": 0.000126732134029343,
"loss": 3.041,
"step": 69200
},
{
"epoch": 0.58,
"learning_rate": 0.0001264807661692376,
"loss": 3.036,
"step": 69300
},
{
"epoch": 0.58,
"learning_rate": 0.00012622939830913218,
"loss": 3.0396,
"step": 69400
},
{
"epoch": 0.58,
"learning_rate": 0.00012597803044902677,
"loss": 3.0418,
"step": 69500
},
{
"epoch": 0.58,
"learning_rate": 0.00012572666258892136,
"loss": 3.0335,
"step": 69600
},
{
"epoch": 0.58,
"learning_rate": 0.00012547529472881595,
"loss": 3.0334,
"step": 69700
},
{
"epoch": 0.58,
"learning_rate": 0.00012522392686871054,
"loss": 3.0381,
"step": 69800
},
{
"epoch": 0.58,
"learning_rate": 0.00012497255900860516,
"loss": 3.0393,
"step": 69900
},
{
"epoch": 0.59,
"learning_rate": 0.00012472119114849975,
"loss": 3.0393,
"step": 70000
},
{
"epoch": 0.59,
"eval_accuracy": 0.43052950228582343,
"eval_loss": 3.05704665184021,
"eval_runtime": 36.208,
"eval_samples_per_second": 309.765,
"eval_steps_per_second": 2.596,
"step": 70000
},
{
"epoch": 0.59,
"learning_rate": 0.00012446982328839434,
"loss": 3.0383,
"step": 70100
},
{
"epoch": 0.59,
"learning_rate": 0.00012421845542828894,
"loss": 3.0441,
"step": 70200
},
{
"epoch": 0.59,
"learning_rate": 0.00012396708756818353,
"loss": 3.0388,
"step": 70300
},
{
"epoch": 0.59,
"learning_rate": 0.00012371823338667916,
"loss": 3.0403,
"step": 70400
},
{
"epoch": 0.59,
"learning_rate": 0.00012346686552657378,
"loss": 3.0368,
"step": 70500
},
{
"epoch": 0.59,
"learning_rate": 0.00012321549766646837,
"loss": 3.0405,
"step": 70600
},
{
"epoch": 0.59,
"learning_rate": 0.00012296412980636296,
"loss": 3.0351,
"step": 70700
},
{
"epoch": 0.59,
"learning_rate": 0.0001227152756248586,
"loss": 3.0355,
"step": 70800
},
{
"epoch": 0.59,
"learning_rate": 0.0001224639077647532,
"loss": 3.038,
"step": 70900
},
{
"epoch": 0.59,
"learning_rate": 0.00012221253990464778,
"loss": 3.0312,
"step": 71000
},
{
"epoch": 0.59,
"eval_accuracy": 0.43072256169476675,
"eval_loss": 3.056051254272461,
"eval_runtime": 35.9605,
"eval_samples_per_second": 311.897,
"eval_steps_per_second": 2.614,
"step": 71000
},
{
"epoch": 0.59,
"learning_rate": 0.00012196117204454238,
"loss": 3.0336,
"step": 71100
},
{
"epoch": 0.6,
"learning_rate": 0.00012170980418443696,
"loss": 3.0371,
"step": 71200
},
{
"epoch": 0.6,
"learning_rate": 0.00012145843632433155,
"loss": 3.0415,
"step": 71300
},
{
"epoch": 0.6,
"learning_rate": 0.00012120706846422614,
"loss": 3.033,
"step": 71400
},
{
"epoch": 0.6,
"learning_rate": 0.00012095570060412075,
"loss": 3.0401,
"step": 71500
},
{
"epoch": 0.6,
"learning_rate": 0.00012070433274401534,
"loss": 3.0407,
"step": 71600
},
{
"epoch": 0.6,
"learning_rate": 0.00012045296488390993,
"loss": 3.0389,
"step": 71700
},
{
"epoch": 0.6,
"learning_rate": 0.00012020159702380452,
"loss": 3.0326,
"step": 71800
},
{
"epoch": 0.6,
"learning_rate": 0.00011995022916369911,
"loss": 3.0343,
"step": 71900
},
{
"epoch": 0.6,
"learning_rate": 0.00011969886130359372,
"loss": 3.0397,
"step": 72000
},
{
"epoch": 0.6,
"eval_accuracy": 0.43072607186583844,
"eval_loss": 3.0532803535461426,
"eval_runtime": 36.5519,
"eval_samples_per_second": 306.851,
"eval_steps_per_second": 2.572,
"step": 72000
},
{
"epoch": 0.6,
"learning_rate": 0.00011945000712208935,
"loss": 3.041,
"step": 72100
},
{
"epoch": 0.6,
"learning_rate": 0.00011919863926198396,
"loss": 3.0375,
"step": 72200
},
{
"epoch": 0.6,
"learning_rate": 0.00011894727140187855,
"loss": 3.03,
"step": 72300
},
{
"epoch": 0.61,
"learning_rate": 0.00011869590354177314,
"loss": 3.0314,
"step": 72400
},
{
"epoch": 0.61,
"learning_rate": 0.00011844453568166773,
"loss": 3.0399,
"step": 72500
},
{
"epoch": 0.61,
"learning_rate": 0.00011819316782156232,
"loss": 3.043,
"step": 72600
},
{
"epoch": 0.61,
"learning_rate": 0.00011794179996145693,
"loss": 3.0324,
"step": 72700
},
{
"epoch": 0.61,
"learning_rate": 0.00011769043210135152,
"loss": 3.037,
"step": 72800
},
{
"epoch": 0.61,
"learning_rate": 0.00011743906424124611,
"loss": 3.0391,
"step": 72900
},
{
"epoch": 0.61,
"learning_rate": 0.00011718769638114069,
"loss": 3.0303,
"step": 73000
},
{
"epoch": 0.61,
"eval_accuracy": 0.43111219068372514,
"eval_loss": 3.0526981353759766,
"eval_runtime": 36.4926,
"eval_samples_per_second": 307.35,
"eval_steps_per_second": 2.576,
"step": 73000
},
{
"epoch": 0.61,
"learning_rate": 0.00011693632852103528,
"loss": 3.0329,
"step": 73100
},
{
"epoch": 0.61,
"learning_rate": 0.00011668496066092987,
"loss": 3.0346,
"step": 73200
},
{
"epoch": 0.61,
"learning_rate": 0.00011643359280082448,
"loss": 3.0405,
"step": 73300
},
{
"epoch": 0.61,
"learning_rate": 0.00011618222494071907,
"loss": 3.0344,
"step": 73400
},
{
"epoch": 0.61,
"learning_rate": 0.00011593085708061366,
"loss": 3.0389,
"step": 73500
},
{
"epoch": 0.62,
"learning_rate": 0.00011567948922050825,
"loss": 3.0361,
"step": 73600
},
{
"epoch": 0.62,
"learning_rate": 0.00011542812136040285,
"loss": 3.0329,
"step": 73700
},
{
"epoch": 0.62,
"learning_rate": 0.00011517675350029745,
"loss": 3.0304,
"step": 73800
},
{
"epoch": 0.62,
"learning_rate": 0.00011492538564019204,
"loss": 3.0316,
"step": 73900
},
{
"epoch": 0.62,
"learning_rate": 0.00011467401778008663,
"loss": 3.0403,
"step": 74000
},
{
"epoch": 0.62,
"eval_accuracy": 0.43146250575668055,
"eval_loss": 3.0502421855926514,
"eval_runtime": 36.2647,
"eval_samples_per_second": 309.281,
"eval_steps_per_second": 2.592,
"step": 74000
},
{
"epoch": 0.62,
"learning_rate": 0.00011442516359858228,
"loss": 3.0443,
"step": 74100
},
{
"epoch": 0.62,
"learning_rate": 0.00011417379573847687,
"loss": 3.0376,
"step": 74200
},
{
"epoch": 0.62,
"learning_rate": 0.00011392242787837146,
"loss": 3.0313,
"step": 74300
},
{
"epoch": 0.62,
"learning_rate": 0.00011367106001826606,
"loss": 3.0429,
"step": 74400
},
{
"epoch": 0.62,
"learning_rate": 0.00011341969215816065,
"loss": 3.0342,
"step": 74500
},
{
"epoch": 0.62,
"learning_rate": 0.00011316832429805525,
"loss": 3.0335,
"step": 74600
},
{
"epoch": 0.62,
"learning_rate": 0.00011291695643794984,
"loss": 3.0375,
"step": 74700
},
{
"epoch": 0.63,
"learning_rate": 0.00011266558857784443,
"loss": 3.0247,
"step": 74800
},
{
"epoch": 0.63,
"learning_rate": 0.000112414220717739,
"loss": 3.0309,
"step": 74900
},
{
"epoch": 0.63,
"learning_rate": 0.00011216285285763361,
"loss": 3.0326,
"step": 75000
},
{
"epoch": 0.63,
"eval_accuracy": 0.43156359868354544,
"eval_loss": 3.049257278442383,
"eval_runtime": 36.2389,
"eval_samples_per_second": 309.501,
"eval_steps_per_second": 2.594,
"step": 75000
},
{
"epoch": 0.63,
"learning_rate": 0.0001119114849975282,
"loss": 3.0389,
"step": 75100
},
{
"epoch": 0.63,
"learning_rate": 0.00011166011713742279,
"loss": 3.0309,
"step": 75200
},
{
"epoch": 0.63,
"learning_rate": 0.00011141126295591844,
"loss": 3.0375,
"step": 75300
},
{
"epoch": 0.63,
"learning_rate": 0.00011115989509581303,
"loss": 3.0351,
"step": 75400
},
{
"epoch": 0.63,
"learning_rate": 0.00011090852723570762,
"loss": 3.0324,
"step": 75500
},
{
"epoch": 0.63,
"learning_rate": 0.00011065715937560223,
"loss": 3.0369,
"step": 75600
},
{
"epoch": 0.63,
"learning_rate": 0.00011040579151549682,
"loss": 3.0289,
"step": 75700
},
{
"epoch": 0.63,
"learning_rate": 0.00011015442365539141,
"loss": 3.0346,
"step": 75800
},
{
"epoch": 0.63,
"learning_rate": 0.000109903055795286,
"loss": 3.0234,
"step": 75900
},
{
"epoch": 0.64,
"learning_rate": 0.0001096516879351806,
"loss": 3.0322,
"step": 76000
},
{
"epoch": 0.64,
"eval_accuracy": 0.4314962033989688,
"eval_loss": 3.0480940341949463,
"eval_runtime": 35.8603,
"eval_samples_per_second": 312.77,
"eval_steps_per_second": 2.621,
"step": 76000
},
{
"epoch": 0.64,
"learning_rate": 0.0001094003200750752,
"loss": 3.027,
"step": 76100
},
{
"epoch": 0.64,
"learning_rate": 0.00010914895221496979,
"loss": 3.03,
"step": 76200
},
{
"epoch": 0.64,
"learning_rate": 0.00010890009803346544,
"loss": 3.0236,
"step": 76300
},
{
"epoch": 0.64,
"learning_rate": 0.00010864873017336003,
"loss": 3.0343,
"step": 76400
},
{
"epoch": 0.64,
"learning_rate": 0.00010839736231325462,
"loss": 3.0335,
"step": 76500
},
{
"epoch": 0.64,
"learning_rate": 0.00010814599445314921,
"loss": 3.0263,
"step": 76600
},
{
"epoch": 0.64,
"learning_rate": 0.00010789462659304382,
"loss": 3.0269,
"step": 76700
},
{
"epoch": 0.64,
"learning_rate": 0.00010764325873293841,
"loss": 3.0391,
"step": 76800
},
{
"epoch": 0.64,
"learning_rate": 0.000107391890872833,
"loss": 3.0361,
"step": 76900
},
{
"epoch": 0.64,
"learning_rate": 0.00010714052301272759,
"loss": 3.0265,
"step": 77000
},
{
"epoch": 0.64,
"eval_accuracy": 0.4318640693272827,
"eval_loss": 3.0469460487365723,
"eval_runtime": 37.1071,
"eval_samples_per_second": 302.26,
"eval_steps_per_second": 2.533,
"step": 77000
},
{
"epoch": 0.64,
"learning_rate": 0.00010688915515262217,
"loss": 3.0313,
"step": 77100
},
{
"epoch": 0.65,
"learning_rate": 0.00010663778729251676,
"loss": 3.0319,
"step": 77200
},
{
"epoch": 0.65,
"learning_rate": 0.00010638641943241136,
"loss": 3.0247,
"step": 77300
},
{
"epoch": 0.65,
"learning_rate": 0.00010613505157230595,
"loss": 3.0264,
"step": 77400
},
{
"epoch": 0.65,
"learning_rate": 0.0001058861973908016,
"loss": 3.0262,
"step": 77500
},
{
"epoch": 0.65,
"learning_rate": 0.0001056348295306962,
"loss": 3.0327,
"step": 77600
},
{
"epoch": 0.65,
"learning_rate": 0.00010538346167059079,
"loss": 3.0318,
"step": 77700
},
{
"epoch": 0.65,
"learning_rate": 0.00010513209381048538,
"loss": 3.0356,
"step": 77800
},
{
"epoch": 0.65,
"learning_rate": 0.00010488072595037997,
"loss": 3.0374,
"step": 77900
},
{
"epoch": 0.65,
"learning_rate": 0.00010462935809027457,
"loss": 3.0231,
"step": 78000
},
{
"epoch": 0.65,
"eval_accuracy": 0.43201430464915136,
"eval_loss": 3.045305013656616,
"eval_runtime": 37.1474,
"eval_samples_per_second": 301.933,
"eval_steps_per_second": 2.53,
"step": 78000
},
{
"epoch": 0.65,
"learning_rate": 0.00010437799023016916,
"loss": 3.0296,
"step": 78100
},
{
"epoch": 0.65,
"learning_rate": 0.00010412662237006376,
"loss": 3.025,
"step": 78200
},
{
"epoch": 0.65,
"learning_rate": 0.00010387525450995835,
"loss": 3.0329,
"step": 78300
},
{
"epoch": 0.66,
"learning_rate": 0.00010362388664985294,
"loss": 3.0268,
"step": 78400
},
{
"epoch": 0.66,
"learning_rate": 0.00010337251878974754,
"loss": 3.0259,
"step": 78500
},
{
"epoch": 0.66,
"learning_rate": 0.00010312115092964213,
"loss": 3.0298,
"step": 78600
},
{
"epoch": 0.66,
"learning_rate": 0.00010286978306953673,
"loss": 3.0296,
"step": 78700
},
{
"epoch": 0.66,
"learning_rate": 0.00010261841520943132,
"loss": 3.0291,
"step": 78800
},
{
"epoch": 0.66,
"learning_rate": 0.0001023670473493259,
"loss": 3.0371,
"step": 78900
},
{
"epoch": 0.66,
"learning_rate": 0.0001021156794892205,
"loss": 3.0259,
"step": 79000
},
{
"epoch": 0.66,
"eval_accuracy": 0.43211188740494455,
"eval_loss": 3.044191837310791,
"eval_runtime": 37.3457,
"eval_samples_per_second": 300.329,
"eval_steps_per_second": 2.517,
"step": 79000
},
{
"epoch": 0.66,
"learning_rate": 0.00010186431162911509,
"loss": 3.0266,
"step": 79100
},
{
"epoch": 0.66,
"learning_rate": 0.00010161294376900968,
"loss": 3.0272,
"step": 79200
},
{
"epoch": 0.66,
"learning_rate": 0.00010136157590890427,
"loss": 3.0191,
"step": 79300
},
{
"epoch": 0.66,
"learning_rate": 0.00010111020804879886,
"loss": 3.0178,
"step": 79400
},
{
"epoch": 0.67,
"learning_rate": 0.00010085884018869347,
"loss": 3.0178,
"step": 79500
},
{
"epoch": 0.67,
"learning_rate": 0.0001006099860071891,
"loss": 3.0264,
"step": 79600
},
{
"epoch": 0.67,
"learning_rate": 0.00010036113182568475,
"loss": 3.0172,
"step": 79700
},
{
"epoch": 0.67,
"learning_rate": 0.00010010976396557934,
"loss": 3.0276,
"step": 79800
},
{
"epoch": 0.67,
"learning_rate": 9.985839610547395e-05,
"loss": 3.0254,
"step": 79900
},
{
"epoch": 0.67,
"learning_rate": 9.960702824536854e-05,
"loss": 3.0219,
"step": 80000
},
{
"epoch": 0.67,
"eval_accuracy": 0.43250292046233163,
"eval_loss": 3.0422935485839844,
"eval_runtime": 37.0202,
"eval_samples_per_second": 302.97,
"eval_steps_per_second": 2.539,
"step": 80000
},
{
"epoch": 0.67,
"learning_rate": 9.935566038526313e-05,
"loss": 3.0265,
"step": 80100
},
{
"epoch": 0.67,
"learning_rate": 9.910429252515772e-05,
"loss": 3.025,
"step": 80200
},
{
"epoch": 0.67,
"learning_rate": 9.885292466505231e-05,
"loss": 3.0164,
"step": 80300
},
{
"epoch": 0.67,
"learning_rate": 9.860155680494692e-05,
"loss": 3.0307,
"step": 80400
},
{
"epoch": 0.67,
"learning_rate": 9.835018894484151e-05,
"loss": 3.0268,
"step": 80500
},
{
"epoch": 0.67,
"learning_rate": 9.80988210847361e-05,
"loss": 3.0261,
"step": 80600
},
{
"epoch": 0.68,
"learning_rate": 9.784745322463069e-05,
"loss": 3.0213,
"step": 80700
},
{
"epoch": 0.68,
"learning_rate": 9.75960853645253e-05,
"loss": 3.0222,
"step": 80800
},
{
"epoch": 0.68,
"learning_rate": 9.734471750441989e-05,
"loss": 3.0249,
"step": 80900
},
{
"epoch": 0.68,
"learning_rate": 9.709334964431448e-05,
"loss": 3.0233,
"step": 81000
},
{
"epoch": 0.68,
"eval_accuracy": 0.4324165702539679,
"eval_loss": 3.0414962768554688,
"eval_runtime": 37.0887,
"eval_samples_per_second": 302.41,
"eval_steps_per_second": 2.534,
"step": 81000
},
{
"epoch": 0.68,
"learning_rate": 9.684198178420906e-05,
"loss": 3.0177,
"step": 81100
},
{
"epoch": 0.68,
"learning_rate": 9.659061392410365e-05,
"loss": 3.0309,
"step": 81200
},
{
"epoch": 0.68,
"learning_rate": 9.633924606399824e-05,
"loss": 3.0245,
"step": 81300
},
{
"epoch": 0.68,
"learning_rate": 9.608787820389284e-05,
"loss": 3.0287,
"step": 81400
},
{
"epoch": 0.68,
"learning_rate": 9.583651034378743e-05,
"loss": 3.0152,
"step": 81500
},
{
"epoch": 0.68,
"learning_rate": 9.558514248368203e-05,
"loss": 3.0204,
"step": 81600
},
{
"epoch": 0.68,
"learning_rate": 9.533377462357662e-05,
"loss": 3.0258,
"step": 81700
},
{
"epoch": 0.68,
"learning_rate": 9.508240676347121e-05,
"loss": 3.0255,
"step": 81800
},
{
"epoch": 0.69,
"learning_rate": 9.483103890336581e-05,
"loss": 3.0245,
"step": 81900
},
{
"epoch": 0.69,
"learning_rate": 9.45796710432604e-05,
"loss": 3.0261,
"step": 82000
},
{
"epoch": 0.69,
"eval_accuracy": 0.43273810192413537,
"eval_loss": 3.040773868560791,
"eval_runtime": 36.3004,
"eval_samples_per_second": 308.977,
"eval_steps_per_second": 2.59,
"step": 82000
},
{
"epoch": 0.69,
"learning_rate": 9.433081686175605e-05,
"loss": 3.0236,
"step": 82100
},
{
"epoch": 0.69,
"learning_rate": 9.407944900165064e-05,
"loss": 3.0339,
"step": 82200
},
{
"epoch": 0.69,
"learning_rate": 9.382808114154523e-05,
"loss": 3.021,
"step": 82300
},
{
"epoch": 0.69,
"learning_rate": 9.357671328143983e-05,
"loss": 3.0208,
"step": 82400
},
{
"epoch": 0.69,
"learning_rate": 9.332534542133443e-05,
"loss": 3.0175,
"step": 82500
},
{
"epoch": 0.69,
"learning_rate": 9.307397756122902e-05,
"loss": 3.0294,
"step": 82600
},
{
"epoch": 0.69,
"learning_rate": 9.282260970112361e-05,
"loss": 3.0258,
"step": 82700
},
{
"epoch": 0.69,
"learning_rate": 9.25712418410182e-05,
"loss": 3.0144,
"step": 82800
},
{
"epoch": 0.69,
"learning_rate": 9.231987398091278e-05,
"loss": 3.016,
"step": 82900
},
{
"epoch": 0.69,
"learning_rate": 9.206850612080737e-05,
"loss": 3.0221,
"step": 83000
},
{
"epoch": 0.69,
"eval_accuracy": 0.43296696507801,
"eval_loss": 3.038726806640625,
"eval_runtime": 36.1807,
"eval_samples_per_second": 309.999,
"eval_steps_per_second": 2.598,
"step": 83000
},
{
"epoch": 0.7,
"learning_rate": 9.181965193930304e-05,
"loss": 3.0217,
"step": 83100
},
{
"epoch": 0.7,
"learning_rate": 9.156828407919761e-05,
"loss": 3.0149,
"step": 83200
},
{
"epoch": 0.7,
"learning_rate": 9.131691621909222e-05,
"loss": 3.0247,
"step": 83300
},
{
"epoch": 0.7,
"learning_rate": 9.106554835898681e-05,
"loss": 3.021,
"step": 83400
},
{
"epoch": 0.7,
"learning_rate": 9.081669417748246e-05,
"loss": 3.0239,
"step": 83500
},
{
"epoch": 0.7,
"learning_rate": 9.056532631737705e-05,
"loss": 3.0349,
"step": 83600
},
{
"epoch": 0.7,
"learning_rate": 9.031395845727164e-05,
"loss": 3.026,
"step": 83700
},
{
"epoch": 0.7,
"learning_rate": 9.006259059716623e-05,
"loss": 3.0178,
"step": 83800
},
{
"epoch": 0.7,
"learning_rate": 8.981122273706082e-05,
"loss": 3.0249,
"step": 83900
},
{
"epoch": 0.7,
"learning_rate": 8.955985487695543e-05,
"loss": 3.0296,
"step": 84000
},
{
"epoch": 0.7,
"eval_accuracy": 0.43312211463937905,
"eval_loss": 3.0376861095428467,
"eval_runtime": 38.9475,
"eval_samples_per_second": 287.978,
"eval_steps_per_second": 2.414,
"step": 84000
},
{
"epoch": 0.7,
"learning_rate": 8.930848701685002e-05,
"loss": 3.0205,
"step": 84100
},
{
"epoch": 0.7,
"learning_rate": 8.905711915674461e-05,
"loss": 3.0214,
"step": 84200
},
{
"epoch": 0.71,
"learning_rate": 8.88057512966392e-05,
"loss": 3.0283,
"step": 84300
},
{
"epoch": 0.71,
"learning_rate": 8.85543834365338e-05,
"loss": 3.0163,
"step": 84400
},
{
"epoch": 0.71,
"learning_rate": 8.83030155764284e-05,
"loss": 3.02,
"step": 84500
},
{
"epoch": 0.71,
"learning_rate": 8.805164771632299e-05,
"loss": 3.0189,
"step": 84600
},
{
"epoch": 0.71,
"learning_rate": 8.780027985621758e-05,
"loss": 3.0167,
"step": 84700
},
{
"epoch": 0.71,
"learning_rate": 8.754891199611217e-05,
"loss": 3.0177,
"step": 84800
},
{
"epoch": 0.71,
"learning_rate": 8.729754413600678e-05,
"loss": 3.0226,
"step": 84900
},
{
"epoch": 0.71,
"learning_rate": 8.704617627590137e-05,
"loss": 3.0186,
"step": 85000
},
{
"epoch": 0.71,
"eval_accuracy": 0.4335391229626967,
"eval_loss": 3.03602933883667,
"eval_runtime": 36.1657,
"eval_samples_per_second": 310.128,
"eval_steps_per_second": 2.599,
"step": 85000
},
{
"epoch": 0.71,
"learning_rate": 8.679480841579594e-05,
"loss": 3.0144,
"step": 85100
},
{
"epoch": 0.71,
"learning_rate": 8.65459542342916e-05,
"loss": 3.0128,
"step": 85200
},
{
"epoch": 0.71,
"learning_rate": 8.629458637418618e-05,
"loss": 3.0189,
"step": 85300
},
{
"epoch": 0.71,
"learning_rate": 8.604321851408077e-05,
"loss": 3.0231,
"step": 85400
},
{
"epoch": 0.72,
"learning_rate": 8.579185065397537e-05,
"loss": 3.0161,
"step": 85500
},
{
"epoch": 0.72,
"learning_rate": 8.554048279386996e-05,
"loss": 3.0188,
"step": 85600
},
{
"epoch": 0.72,
"learning_rate": 8.528911493376456e-05,
"loss": 3.027,
"step": 85700
},
{
"epoch": 0.72,
"learning_rate": 8.503774707365915e-05,
"loss": 3.017,
"step": 85800
},
{
"epoch": 0.72,
"learning_rate": 8.478637921355374e-05,
"loss": 3.0173,
"step": 85900
},
{
"epoch": 0.72,
"learning_rate": 8.453501135344834e-05,
"loss": 3.0151,
"step": 86000
},
{
"epoch": 0.72,
"eval_accuracy": 0.43330745167196466,
"eval_loss": 3.034996747970581,
"eval_runtime": 36.1826,
"eval_samples_per_second": 309.983,
"eval_steps_per_second": 2.598,
"step": 86000
},
{
"epoch": 0.72,
"learning_rate": 8.428364349334294e-05,
"loss": 3.0227,
"step": 86100
},
{
"epoch": 0.72,
"learning_rate": 8.403227563323753e-05,
"loss": 3.0163,
"step": 86200
},
{
"epoch": 0.72,
"learning_rate": 8.378090777313212e-05,
"loss": 3.0096,
"step": 86300
},
{
"epoch": 0.72,
"learning_rate": 8.352953991302671e-05,
"loss": 3.0147,
"step": 86400
},
{
"epoch": 0.72,
"learning_rate": 8.32781720529213e-05,
"loss": 3.0051,
"step": 86500
},
{
"epoch": 0.72,
"learning_rate": 8.302680419281591e-05,
"loss": 3.0201,
"step": 86600
},
{
"epoch": 0.73,
"learning_rate": 8.277795001131154e-05,
"loss": 3.0169,
"step": 86700
},
{
"epoch": 0.73,
"learning_rate": 8.252658215120615e-05,
"loss": 3.008,
"step": 86800
},
{
"epoch": 0.73,
"learning_rate": 8.227521429110074e-05,
"loss": 3.0117,
"step": 86900
},
{
"epoch": 0.73,
"learning_rate": 8.202384643099533e-05,
"loss": 3.0121,
"step": 87000
},
{
"epoch": 0.73,
"eval_accuracy": 0.43354333516798277,
"eval_loss": 3.033334493637085,
"eval_runtime": 37.3178,
"eval_samples_per_second": 300.553,
"eval_steps_per_second": 2.519,
"step": 87000
},
{
"epoch": 0.73,
"learning_rate": 8.177247857088992e-05,
"loss": 3.014,
"step": 87100
},
{
"epoch": 0.73,
"learning_rate": 8.15211107107845e-05,
"loss": 3.0168,
"step": 87200
},
{
"epoch": 0.73,
"learning_rate": 8.126974285067909e-05,
"loss": 3.0092,
"step": 87300
},
{
"epoch": 0.73,
"learning_rate": 8.10183749905737e-05,
"loss": 3.0231,
"step": 87400
},
{
"epoch": 0.73,
"learning_rate": 8.076700713046829e-05,
"loss": 3.0133,
"step": 87500
},
{
"epoch": 0.73,
"learning_rate": 8.051563927036288e-05,
"loss": 3.0135,
"step": 87600
},
{
"epoch": 0.73,
"learning_rate": 8.026427141025747e-05,
"loss": 3.0188,
"step": 87700
},
{
"epoch": 0.73,
"learning_rate": 8.001290355015206e-05,
"loss": 3.0151,
"step": 87800
},
{
"epoch": 0.74,
"learning_rate": 7.976153569004667e-05,
"loss": 3.0211,
"step": 87900
},
{
"epoch": 0.74,
"learning_rate": 7.951016782994126e-05,
"loss": 3.0142,
"step": 88000
},
{
"epoch": 0.74,
"eval_accuracy": 0.4337988756220023,
"eval_loss": 3.032519817352295,
"eval_runtime": 37.5602,
"eval_samples_per_second": 298.614,
"eval_steps_per_second": 2.503,
"step": 88000
},
{
"epoch": 0.74,
"learning_rate": 7.925879996983585e-05,
"loss": 3.0117,
"step": 88100
},
{
"epoch": 0.74,
"learning_rate": 7.900743210973044e-05,
"loss": 3.0092,
"step": 88200
},
{
"epoch": 0.74,
"learning_rate": 7.875606424962505e-05,
"loss": 3.0124,
"step": 88300
},
{
"epoch": 0.74,
"learning_rate": 7.850469638951964e-05,
"loss": 3.0104,
"step": 88400
},
{
"epoch": 0.74,
"learning_rate": 7.825584220801528e-05,
"loss": 3.0136,
"step": 88500
},
{
"epoch": 0.74,
"learning_rate": 7.800447434790988e-05,
"loss": 3.0186,
"step": 88600
},
{
"epoch": 0.74,
"learning_rate": 7.775310648780447e-05,
"loss": 3.0107,
"step": 88700
},
{
"epoch": 0.74,
"learning_rate": 7.750173862769906e-05,
"loss": 3.0129,
"step": 88800
},
{
"epoch": 0.74,
"learning_rate": 7.725037076759365e-05,
"loss": 3.0117,
"step": 88900
},
{
"epoch": 0.74,
"learning_rate": 7.699900290748825e-05,
"loss": 3.0088,
"step": 89000
},
{
"epoch": 0.74,
"eval_accuracy": 0.4338164264773608,
"eval_loss": 3.031200647354126,
"eval_runtime": 36.6187,
"eval_samples_per_second": 306.292,
"eval_steps_per_second": 2.567,
"step": 89000
},
{
"epoch": 0.75,
"learning_rate": 7.674763504738283e-05,
"loss": 3.0107,
"step": 89100
},
{
"epoch": 0.75,
"learning_rate": 7.649626718727742e-05,
"loss": 3.0104,
"step": 89200
},
{
"epoch": 0.75,
"learning_rate": 7.624489932717202e-05,
"loss": 3.0141,
"step": 89300
},
{
"epoch": 0.75,
"learning_rate": 7.59935314670666e-05,
"loss": 3.0263,
"step": 89400
},
{
"epoch": 0.75,
"learning_rate": 7.57421636069612e-05,
"loss": 3.0093,
"step": 89500
},
{
"epoch": 0.75,
"learning_rate": 7.54907957468558e-05,
"loss": 3.0057,
"step": 89600
},
{
"epoch": 0.75,
"learning_rate": 7.52394278867504e-05,
"loss": 3.0104,
"step": 89700
},
{
"epoch": 0.75,
"learning_rate": 7.498806002664499e-05,
"loss": 3.0202,
"step": 89800
},
{
"epoch": 0.75,
"learning_rate": 7.473669216653958e-05,
"loss": 3.0118,
"step": 89900
},
{
"epoch": 0.75,
"learning_rate": 7.448532430643417e-05,
"loss": 3.0087,
"step": 90000
},
{
"epoch": 0.75,
"eval_accuracy": 0.43394138856751324,
"eval_loss": 3.0297725200653076,
"eval_runtime": 36.7079,
"eval_samples_per_second": 305.547,
"eval_steps_per_second": 2.561,
"step": 90000
},
{
"epoch": 0.75,
"learning_rate": 7.423395644632877e-05,
"loss": 3.0163,
"step": 90100
},
{
"epoch": 0.75,
"learning_rate": 7.398258858622336e-05,
"loss": 3.0168,
"step": 90200
},
{
"epoch": 0.76,
"learning_rate": 7.373122072611796e-05,
"loss": 3.0145,
"step": 90300
},
{
"epoch": 0.76,
"learning_rate": 7.347985286601255e-05,
"loss": 3.0112,
"step": 90400
},
{
"epoch": 0.76,
"learning_rate": 7.322848500590714e-05,
"loss": 3.0094,
"step": 90500
},
{
"epoch": 0.76,
"learning_rate": 7.297711714580173e-05,
"loss": 3.0129,
"step": 90600
},
{
"epoch": 0.76,
"learning_rate": 7.272574928569632e-05,
"loss": 3.0033,
"step": 90700
},
{
"epoch": 0.76,
"learning_rate": 7.247438142559092e-05,
"loss": 3.0115,
"step": 90800
},
{
"epoch": 0.76,
"learning_rate": 7.222301356548552e-05,
"loss": 3.0075,
"step": 90900
},
{
"epoch": 0.76,
"learning_rate": 7.197164570538011e-05,
"loss": 3.0134,
"step": 91000
},
{
"epoch": 0.76,
"eval_accuracy": 0.43423554090332145,
"eval_loss": 3.0285885334014893,
"eval_runtime": 36.591,
"eval_samples_per_second": 306.523,
"eval_steps_per_second": 2.569,
"step": 91000
},
{
"epoch": 0.76,
"learning_rate": 7.172279152387576e-05,
"loss": 3.019,
"step": 91100
},
{
"epoch": 0.76,
"learning_rate": 7.147142366377035e-05,
"loss": 3.0166,
"step": 91200
},
{
"epoch": 0.76,
"learning_rate": 7.122005580366494e-05,
"loss": 3.0114,
"step": 91300
},
{
"epoch": 0.76,
"learning_rate": 7.097120162216059e-05,
"loss": 3.015,
"step": 91400
},
{
"epoch": 0.77,
"learning_rate": 7.071983376205518e-05,
"loss": 3.0123,
"step": 91500
},
{
"epoch": 0.77,
"learning_rate": 7.046846590194977e-05,
"loss": 3.007,
"step": 91600
},
{
"epoch": 0.77,
"learning_rate": 7.021709804184436e-05,
"loss": 3.005,
"step": 91700
},
{
"epoch": 0.77,
"learning_rate": 6.996573018173895e-05,
"loss": 3.0122,
"step": 91800
},
{
"epoch": 0.77,
"learning_rate": 6.971436232163356e-05,
"loss": 3.0069,
"step": 91900
},
{
"epoch": 0.77,
"learning_rate": 6.946299446152815e-05,
"loss": 3.0136,
"step": 92000
},
{
"epoch": 0.77,
"eval_accuracy": 0.43437735181461806,
"eval_loss": 3.0268590450286865,
"eval_runtime": 36.7262,
"eval_samples_per_second": 305.395,
"eval_steps_per_second": 2.559,
"step": 92000
},
{
"epoch": 0.77,
"learning_rate": 6.921162660142274e-05,
"loss": 3.0063,
"step": 92100
},
{
"epoch": 0.77,
"learning_rate": 6.896025874131733e-05,
"loss": 3.007,
"step": 92200
},
{
"epoch": 0.77,
"learning_rate": 6.870889088121192e-05,
"loss": 3.0132,
"step": 92300
},
{
"epoch": 0.77,
"learning_rate": 6.845752302110651e-05,
"loss": 3.0145,
"step": 92400
},
{
"epoch": 0.77,
"learning_rate": 6.82061551610011e-05,
"loss": 3.0116,
"step": 92500
},
{
"epoch": 0.77,
"learning_rate": 6.79547873008957e-05,
"loss": 3.0138,
"step": 92600
},
{
"epoch": 0.78,
"learning_rate": 6.77034194407903e-05,
"loss": 3.0075,
"step": 92700
},
{
"epoch": 0.78,
"learning_rate": 6.745205158068489e-05,
"loss": 3.0098,
"step": 92800
},
{
"epoch": 0.78,
"learning_rate": 6.720068372057948e-05,
"loss": 3.0058,
"step": 92900
},
{
"epoch": 0.78,
"learning_rate": 6.694931586047407e-05,
"loss": 3.0043,
"step": 93000
},
{
"epoch": 0.78,
"eval_accuracy": 0.43468133262942704,
"eval_loss": 3.0255324840545654,
"eval_runtime": 36.1761,
"eval_samples_per_second": 310.039,
"eval_steps_per_second": 2.598,
"step": 93000
},
{
"epoch": 0.78,
"learning_rate": 6.669794800036866e-05,
"loss": 3.0167,
"step": 93100
},
{
"epoch": 0.78,
"learning_rate": 6.644658014026327e-05,
"loss": 3.0077,
"step": 93200
},
{
"epoch": 0.78,
"learning_rate": 6.619521228015785e-05,
"loss": 3.0087,
"step": 93300
},
{
"epoch": 0.78,
"learning_rate": 6.594384442005244e-05,
"loss": 3.0137,
"step": 93400
},
{
"epoch": 0.78,
"learning_rate": 6.569499023854809e-05,
"loss": 3.015,
"step": 93500
},
{
"epoch": 0.78,
"learning_rate": 6.544362237844268e-05,
"loss": 3.0046,
"step": 93600
},
{
"epoch": 0.78,
"learning_rate": 6.519225451833728e-05,
"loss": 3.0015,
"step": 93700
},
{
"epoch": 0.78,
"learning_rate": 6.494088665823187e-05,
"loss": 3.0074,
"step": 93800
},
{
"epoch": 0.79,
"learning_rate": 6.468951879812646e-05,
"loss": 3.0082,
"step": 93900
},
{
"epoch": 0.79,
"learning_rate": 6.443815093802106e-05,
"loss": 2.9995,
"step": 94000
},
{
"epoch": 0.79,
"eval_accuracy": 0.43484701270401116,
"eval_loss": 3.023953914642334,
"eval_runtime": 36.328,
"eval_samples_per_second": 308.742,
"eval_steps_per_second": 2.588,
"step": 94000
},
{
"epoch": 0.79,
"learning_rate": 6.418678307791566e-05,
"loss": 3.0039,
"step": 94100
},
{
"epoch": 0.79,
"learning_rate": 6.393541521781025e-05,
"loss": 3.0095,
"step": 94200
},
{
"epoch": 0.79,
"learning_rate": 6.368404735770483e-05,
"loss": 3.0028,
"step": 94300
},
{
"epoch": 0.79,
"learning_rate": 6.343267949759943e-05,
"loss": 3.0082,
"step": 94400
},
{
"epoch": 0.79,
"learning_rate": 6.318131163749403e-05,
"loss": 3.0069,
"step": 94500
},
{
"epoch": 0.79,
"learning_rate": 6.292994377738862e-05,
"loss": 3.0004,
"step": 94600
},
{
"epoch": 0.79,
"learning_rate": 6.267857591728321e-05,
"loss": 3.0087,
"step": 94700
},
{
"epoch": 0.79,
"learning_rate": 6.24272080571778e-05,
"loss": 3.0062,
"step": 94800
},
{
"epoch": 0.79,
"learning_rate": 6.21758401970724e-05,
"loss": 3.0113,
"step": 94900
},
{
"epoch": 0.79,
"learning_rate": 6.1924472336967e-05,
"loss": 3.001,
"step": 95000
},
{
"epoch": 0.79,
"eval_accuracy": 0.434945999528233,
"eval_loss": 3.0230536460876465,
"eval_runtime": 36.4523,
"eval_samples_per_second": 307.69,
"eval_steps_per_second": 2.579,
"step": 95000
},
{
"epoch": 0.8,
"learning_rate": 6.167310447686157e-05,
"loss": 3.0026,
"step": 95100
},
{
"epoch": 0.8,
"learning_rate": 6.142173661675618e-05,
"loss": 3.01,
"step": 95200
},
{
"epoch": 0.8,
"learning_rate": 6.117036875665077e-05,
"loss": 3.0073,
"step": 95300
},
{
"epoch": 0.8,
"learning_rate": 6.091900089654536e-05,
"loss": 3.0101,
"step": 95400
},
{
"epoch": 0.8,
"learning_rate": 6.067014671504101e-05,
"loss": 3.0013,
"step": 95500
},
{
"epoch": 0.8,
"learning_rate": 6.04187788549356e-05,
"loss": 3.004,
"step": 95600
},
{
"epoch": 0.8,
"learning_rate": 6.01674109948302e-05,
"loss": 3.0042,
"step": 95700
},
{
"epoch": 0.8,
"learning_rate": 5.991604313472479e-05,
"loss": 3.0081,
"step": 95800
},
{
"epoch": 0.8,
"learning_rate": 5.966467527461938e-05,
"loss": 3.0048,
"step": 95900
},
{
"epoch": 0.8,
"learning_rate": 5.941582109311503e-05,
"loss": 3.007,
"step": 96000
},
{
"epoch": 0.8,
"eval_accuracy": 0.4351959237085379,
"eval_loss": 3.02174973487854,
"eval_runtime": 36.367,
"eval_samples_per_second": 308.412,
"eval_steps_per_second": 2.585,
"step": 96000
},
{
"epoch": 0.8,
"learning_rate": 5.9164453233009627e-05,
"loss": 3.006,
"step": 96100
},
{
"epoch": 0.8,
"learning_rate": 5.891308537290422e-05,
"loss": 3.0025,
"step": 96200
},
{
"epoch": 0.81,
"learning_rate": 5.866171751279881e-05,
"loss": 3.006,
"step": 96300
},
{
"epoch": 0.81,
"learning_rate": 5.841034965269341e-05,
"loss": 2.9956,
"step": 96400
},
{
"epoch": 0.81,
"learning_rate": 5.815898179258799e-05,
"loss": 2.9968,
"step": 96500
},
{
"epoch": 0.81,
"learning_rate": 5.790761393248258e-05,
"loss": 3.0023,
"step": 96600
},
{
"epoch": 0.81,
"learning_rate": 5.765624607237718e-05,
"loss": 3.0014,
"step": 96700
},
{
"epoch": 0.81,
"learning_rate": 5.740487821227177e-05,
"loss": 2.9961,
"step": 96800
},
{
"epoch": 0.81,
"learning_rate": 5.715351035216637e-05,
"loss": 3.0024,
"step": 96900
},
{
"epoch": 0.81,
"learning_rate": 5.690214249206096e-05,
"loss": 3.0035,
"step": 97000
},
{
"epoch": 0.81,
"eval_accuracy": 0.43532720410661935,
"eval_loss": 3.02020263671875,
"eval_runtime": 37.453,
"eval_samples_per_second": 299.469,
"eval_steps_per_second": 2.51,
"step": 97000
},
{
"epoch": 0.81,
"learning_rate": 5.665328831055661e-05,
"loss": 3.0032,
"step": 97100
},
{
"epoch": 0.81,
"learning_rate": 5.64019204504512e-05,
"loss": 2.9961,
"step": 97200
},
{
"epoch": 0.81,
"learning_rate": 5.61505525903458e-05,
"loss": 3.0048,
"step": 97300
},
{
"epoch": 0.81,
"learning_rate": 5.589918473024039e-05,
"loss": 2.9939,
"step": 97400
},
{
"epoch": 0.82,
"learning_rate": 5.565033054873604e-05,
"loss": 2.9995,
"step": 97500
},
{
"epoch": 0.82,
"learning_rate": 5.539896268863063e-05,
"loss": 3.0067,
"step": 97600
},
{
"epoch": 0.82,
"learning_rate": 5.514759482852523e-05,
"loss": 2.9924,
"step": 97700
},
{
"epoch": 0.82,
"learning_rate": 5.489622696841981e-05,
"loss": 2.9997,
"step": 97800
},
{
"epoch": 0.82,
"learning_rate": 5.46448591083144e-05,
"loss": 3.0077,
"step": 97900
},
{
"epoch": 0.82,
"learning_rate": 5.4393491248209e-05,
"loss": 2.9966,
"step": 98000
},
{
"epoch": 0.82,
"eval_accuracy": 0.43553711233670683,
"eval_loss": 3.019421100616455,
"eval_runtime": 36.419,
"eval_samples_per_second": 307.971,
"eval_steps_per_second": 2.581,
"step": 98000
},
{
"epoch": 0.82,
"learning_rate": 5.414212338810359e-05,
"loss": 3.0027,
"step": 98100
},
{
"epoch": 0.82,
"learning_rate": 5.3890755527998184e-05,
"loss": 3.0008,
"step": 98200
},
{
"epoch": 0.82,
"learning_rate": 5.363938766789278e-05,
"loss": 3.0019,
"step": 98300
},
{
"epoch": 0.82,
"learning_rate": 5.338801980778737e-05,
"loss": 2.9993,
"step": 98400
},
{
"epoch": 0.82,
"learning_rate": 5.313665194768197e-05,
"loss": 3.0025,
"step": 98500
},
{
"epoch": 0.82,
"learning_rate": 5.2885284087576555e-05,
"loss": 2.9987,
"step": 98600
},
{
"epoch": 0.83,
"learning_rate": 5.263391622747115e-05,
"loss": 3.0054,
"step": 98700
},
{
"epoch": 0.83,
"learning_rate": 5.2382548367365745e-05,
"loss": 3.0064,
"step": 98800
},
{
"epoch": 0.83,
"learning_rate": 5.2131180507260336e-05,
"loss": 3.0096,
"step": 98900
},
{
"epoch": 0.83,
"learning_rate": 5.1879812647154934e-05,
"loss": 2.9881,
"step": 99000
},
{
"epoch": 0.83,
"eval_accuracy": 0.4356613723926449,
"eval_loss": 3.0177648067474365,
"eval_runtime": 37.6095,
"eval_samples_per_second": 298.223,
"eval_steps_per_second": 2.499,
"step": 99000
},
{
"epoch": 0.83,
"learning_rate": 5.1628444787049525e-05,
"loss": 3.0002,
"step": 99100
},
{
"epoch": 0.83,
"learning_rate": 5.1377076926944117e-05,
"loss": 2.9966,
"step": 99200
},
{
"epoch": 0.83,
"learning_rate": 5.1128222745439764e-05,
"loss": 3.0012,
"step": 99300
},
{
"epoch": 0.83,
"learning_rate": 5.0876854885334356e-05,
"loss": 2.9964,
"step": 99400
},
{
"epoch": 0.83,
"learning_rate": 5.0625487025228954e-05,
"loss": 2.9981,
"step": 99500
},
{
"epoch": 0.83,
"learning_rate": 5.037411916512354e-05,
"loss": 2.9986,
"step": 99600
},
{
"epoch": 0.83,
"learning_rate": 5.012275130501813e-05,
"loss": 2.9981,
"step": 99700
},
{
"epoch": 0.83,
"learning_rate": 4.987138344491273e-05,
"loss": 3.0057,
"step": 99800
},
{
"epoch": 0.84,
"learning_rate": 4.962001558480732e-05,
"loss": 2.994,
"step": 99900
},
{
"epoch": 0.84,
"learning_rate": 4.936864772470192e-05,
"loss": 3.0028,
"step": 100000
},
{
"epoch": 0.84,
"eval_accuracy": 0.43574631853258,
"eval_loss": 3.0173962116241455,
"eval_runtime": 36.2768,
"eval_samples_per_second": 309.179,
"eval_steps_per_second": 2.591,
"step": 100000
},
{
"epoch": 0.84,
"learning_rate": 4.911727986459651e-05,
"loss": 3.0028,
"step": 100100
},
{
"epoch": 0.84,
"learning_rate": 4.8865912004491106e-05,
"loss": 2.9969,
"step": 100200
},
{
"epoch": 0.84,
"learning_rate": 4.86145441443857e-05,
"loss": 3.0029,
"step": 100300
},
{
"epoch": 0.84,
"learning_rate": 4.836317628428029e-05,
"loss": 3.0033,
"step": 100400
},
{
"epoch": 0.84,
"learning_rate": 4.811180842417488e-05,
"loss": 2.9945,
"step": 100500
},
{
"epoch": 0.84,
"learning_rate": 4.786044056406947e-05,
"loss": 2.9985,
"step": 100600
},
{
"epoch": 0.84,
"learning_rate": 4.760907270396406e-05,
"loss": 2.9952,
"step": 100700
},
{
"epoch": 0.84,
"learning_rate": 4.735770484385866e-05,
"loss": 2.9859,
"step": 100800
},
{
"epoch": 0.84,
"learning_rate": 4.710633698375325e-05,
"loss": 2.9951,
"step": 100900
},
{
"epoch": 0.84,
"learning_rate": 4.685496912364785e-05,
"loss": 2.9933,
"step": 101000
},
{
"epoch": 0.84,
"eval_accuracy": 0.4362117672166871,
"eval_loss": 3.01594614982605,
"eval_runtime": 36.0518,
"eval_samples_per_second": 311.108,
"eval_steps_per_second": 2.607,
"step": 101000
},
{
"epoch": 0.85,
"learning_rate": 4.660360126354244e-05,
"loss": 2.9979,
"step": 101100
},
{
"epoch": 0.85,
"learning_rate": 4.635223340343704e-05,
"loss": 2.9961,
"step": 101200
},
{
"epoch": 0.85,
"learning_rate": 4.6100865543331624e-05,
"loss": 3.0076,
"step": 101300
},
{
"epoch": 0.85,
"learning_rate": 4.5849497683226215e-05,
"loss": 3.0,
"step": 101400
},
{
"epoch": 0.85,
"learning_rate": 4.559812982312081e-05,
"loss": 2.9964,
"step": 101500
},
{
"epoch": 0.85,
"learning_rate": 4.5346761963015404e-05,
"loss": 2.9951,
"step": 101600
},
{
"epoch": 0.85,
"learning_rate": 4.5095394102909996e-05,
"loss": 2.9964,
"step": 101700
},
{
"epoch": 0.85,
"learning_rate": 4.4844026242804594e-05,
"loss": 3.0034,
"step": 101800
},
{
"epoch": 0.85,
"learning_rate": 4.4592658382699185e-05,
"loss": 2.994,
"step": 101900
},
{
"epoch": 0.85,
"learning_rate": 4.434129052259378e-05,
"loss": 3.0002,
"step": 102000
},
{
"epoch": 0.85,
"eval_accuracy": 0.43605310748424636,
"eval_loss": 3.01462721824646,
"eval_runtime": 36.4761,
"eval_samples_per_second": 307.489,
"eval_steps_per_second": 2.577,
"step": 102000
},
{
"epoch": 0.85,
"learning_rate": 4.408992266248837e-05,
"loss": 2.9951,
"step": 102100
},
{
"epoch": 0.85,
"learning_rate": 4.383855480238296e-05,
"loss": 2.9959,
"step": 102200
},
{
"epoch": 0.86,
"learning_rate": 4.358718694227756e-05,
"loss": 2.9922,
"step": 102300
},
{
"epoch": 0.86,
"learning_rate": 4.333581908217215e-05,
"loss": 2.9888,
"step": 102400
},
{
"epoch": 0.86,
"learning_rate": 4.3084451222066746e-05,
"loss": 2.9951,
"step": 102500
},
{
"epoch": 0.86,
"learning_rate": 4.283559704056239e-05,
"loss": 2.994,
"step": 102600
},
{
"epoch": 0.86,
"learning_rate": 4.2586742859058035e-05,
"loss": 2.9969,
"step": 102700
},
{
"epoch": 0.86,
"learning_rate": 4.2335374998952626e-05,
"loss": 2.9959,
"step": 102800
},
{
"epoch": 0.86,
"learning_rate": 4.2084007138847224e-05,
"loss": 3.0063,
"step": 102900
},
{
"epoch": 0.86,
"learning_rate": 4.1832639278741816e-05,
"loss": 2.9901,
"step": 103000
},
{
"epoch": 0.86,
"eval_accuracy": 0.4364076347624878,
"eval_loss": 3.0131843090057373,
"eval_runtime": 36.6938,
"eval_samples_per_second": 305.665,
"eval_steps_per_second": 2.562,
"step": 103000
},
{
"epoch": 0.86,
"learning_rate": 4.1581271418636414e-05,
"loss": 2.996,
"step": 103100
},
{
"epoch": 0.86,
"learning_rate": 4.1329903558531005e-05,
"loss": 2.9928,
"step": 103200
},
{
"epoch": 0.86,
"learning_rate": 4.1078535698425596e-05,
"loss": 2.9981,
"step": 103300
},
{
"epoch": 0.86,
"learning_rate": 4.082716783832019e-05,
"loss": 2.9999,
"step": 103400
},
{
"epoch": 0.87,
"learning_rate": 4.057579997821478e-05,
"loss": 2.9879,
"step": 103500
},
{
"epoch": 0.87,
"learning_rate": 4.032443211810937e-05,
"loss": 2.9927,
"step": 103600
},
{
"epoch": 0.87,
"learning_rate": 4.007306425800397e-05,
"loss": 2.997,
"step": 103700
},
{
"epoch": 0.87,
"learning_rate": 3.982169639789856e-05,
"loss": 2.9899,
"step": 103800
},
{
"epoch": 0.87,
"learning_rate": 3.957032853779316e-05,
"loss": 3.0014,
"step": 103900
},
{
"epoch": 0.87,
"learning_rate": 3.931896067768775e-05,
"loss": 2.9895,
"step": 104000
},
{
"epoch": 0.87,
"eval_accuracy": 0.4363837655992002,
"eval_loss": 3.012049674987793,
"eval_runtime": 36.7749,
"eval_samples_per_second": 304.99,
"eval_steps_per_second": 2.556,
"step": 104000
},
{
"epoch": 0.87,
"learning_rate": 3.906759281758235e-05,
"loss": 3.0,
"step": 104100
},
{
"epoch": 0.87,
"learning_rate": 3.881622495747693e-05,
"loss": 2.9981,
"step": 104200
},
{
"epoch": 0.87,
"learning_rate": 3.856485709737152e-05,
"loss": 2.9975,
"step": 104300
},
{
"epoch": 0.87,
"learning_rate": 3.831600291586718e-05,
"loss": 3.0007,
"step": 104400
},
{
"epoch": 0.87,
"learning_rate": 3.806463505576176e-05,
"loss": 2.9958,
"step": 104500
},
{
"epoch": 0.87,
"learning_rate": 3.781326719565636e-05,
"loss": 2.9939,
"step": 104600
},
{
"epoch": 0.88,
"learning_rate": 3.756189933555095e-05,
"loss": 2.9936,
"step": 104700
},
{
"epoch": 0.88,
"learning_rate": 3.731053147544555e-05,
"loss": 3.0004,
"step": 104800
},
{
"epoch": 0.88,
"learning_rate": 3.705916361534014e-05,
"loss": 2.9945,
"step": 104900
},
{
"epoch": 0.88,
"learning_rate": 3.680779575523473e-05,
"loss": 2.9882,
"step": 105000
},
{
"epoch": 0.88,
"eval_accuracy": 0.4366730036955081,
"eval_loss": 3.0106048583984375,
"eval_runtime": 36.7107,
"eval_samples_per_second": 305.524,
"eval_steps_per_second": 2.561,
"step": 105000
},
{
"epoch": 0.88,
"learning_rate": 3.655642789512932e-05,
"loss": 2.9919,
"step": 105100
},
{
"epoch": 0.88,
"learning_rate": 3.630506003502392e-05,
"loss": 2.9894,
"step": 105200
},
{
"epoch": 0.88,
"learning_rate": 3.605369217491851e-05,
"loss": 3.0005,
"step": 105300
},
{
"epoch": 0.88,
"learning_rate": 3.580483799341416e-05,
"loss": 2.9884,
"step": 105400
},
{
"epoch": 0.88,
"learning_rate": 3.555347013330875e-05,
"loss": 2.9865,
"step": 105500
},
{
"epoch": 0.88,
"learning_rate": 3.530210227320335e-05,
"loss": 2.9909,
"step": 105600
},
{
"epoch": 0.88,
"learning_rate": 3.5050734413097934e-05,
"loss": 2.9961,
"step": 105700
},
{
"epoch": 0.89,
"learning_rate": 3.479936655299253e-05,
"loss": 2.9905,
"step": 105800
},
{
"epoch": 0.89,
"learning_rate": 3.454799869288712e-05,
"loss": 2.9913,
"step": 105900
},
{
"epoch": 0.89,
"learning_rate": 3.429914451138277e-05,
"loss": 2.9866,
"step": 106000
},
{
"epoch": 0.89,
"eval_accuracy": 0.4369524133128152,
"eval_loss": 3.008857250213623,
"eval_runtime": 36.004,
"eval_samples_per_second": 311.521,
"eval_steps_per_second": 2.611,
"step": 106000
},
{
"epoch": 0.89,
"learning_rate": 3.404777665127736e-05,
"loss": 2.9893,
"step": 106100
},
{
"epoch": 0.89,
"learning_rate": 3.379640879117196e-05,
"loss": 2.989,
"step": 106200
},
{
"epoch": 0.89,
"learning_rate": 3.354504093106655e-05,
"loss": 2.9886,
"step": 106300
},
{
"epoch": 0.89,
"learning_rate": 3.329367307096114e-05,
"loss": 2.9835,
"step": 106400
},
{
"epoch": 0.89,
"learning_rate": 3.3042305210855734e-05,
"loss": 2.9918,
"step": 106500
},
{
"epoch": 0.89,
"learning_rate": 3.279093735075033e-05,
"loss": 2.9855,
"step": 106600
},
{
"epoch": 0.89,
"learning_rate": 3.2539569490644923e-05,
"loss": 2.9895,
"step": 106700
},
{
"epoch": 0.89,
"learning_rate": 3.229071530914057e-05,
"loss": 2.9791,
"step": 106800
},
{
"epoch": 0.89,
"learning_rate": 3.203934744903516e-05,
"loss": 2.9955,
"step": 106900
},
{
"epoch": 0.9,
"learning_rate": 3.178797958892976e-05,
"loss": 2.9961,
"step": 107000
},
{
"epoch": 0.9,
"eval_accuracy": 0.43725920226448156,
"eval_loss": 3.007978677749634,
"eval_runtime": 36.4349,
"eval_samples_per_second": 307.837,
"eval_steps_per_second": 2.58,
"step": 107000
},
{
"epoch": 0.9,
"learning_rate": 3.153661172882435e-05,
"loss": 2.9921,
"step": 107100
},
{
"epoch": 0.9,
"learning_rate": 3.128524386871894e-05,
"loss": 2.9937,
"step": 107200
},
{
"epoch": 0.9,
"learning_rate": 3.1033876008613534e-05,
"loss": 2.9894,
"step": 107300
},
{
"epoch": 0.9,
"learning_rate": 3.078250814850813e-05,
"loss": 2.9919,
"step": 107400
},
{
"epoch": 0.9,
"learning_rate": 3.0531140288402724e-05,
"loss": 2.9906,
"step": 107500
},
{
"epoch": 0.9,
"learning_rate": 3.0279772428297315e-05,
"loss": 2.9839,
"step": 107600
},
{
"epoch": 0.9,
"learning_rate": 3.002840456819191e-05,
"loss": 2.9871,
"step": 107700
},
{
"epoch": 0.9,
"learning_rate": 2.9777036708086504e-05,
"loss": 2.9891,
"step": 107800
},
{
"epoch": 0.9,
"learning_rate": 2.9525668847981092e-05,
"loss": 2.9898,
"step": 107900
},
{
"epoch": 0.9,
"learning_rate": 2.9274300987875687e-05,
"loss": 2.9876,
"step": 108000
},
{
"epoch": 0.9,
"eval_accuracy": 0.4373946948678491,
"eval_loss": 3.0067296028137207,
"eval_runtime": 36.3734,
"eval_samples_per_second": 308.357,
"eval_steps_per_second": 2.584,
"step": 108000
},
{
"epoch": 0.9,
"learning_rate": 2.902293312777028e-05,
"loss": 2.9917,
"step": 108100
},
{
"epoch": 0.91,
"learning_rate": 2.8771565267664876e-05,
"loss": 2.9916,
"step": 108200
},
{
"epoch": 0.91,
"learning_rate": 2.8520197407559464e-05,
"loss": 2.9981,
"step": 108300
},
{
"epoch": 0.91,
"learning_rate": 2.826882954745406e-05,
"loss": 2.9817,
"step": 108400
},
{
"epoch": 0.91,
"learning_rate": 2.8017461687348653e-05,
"loss": 2.9875,
"step": 108500
},
{
"epoch": 0.91,
"learning_rate": 2.7766093827243248e-05,
"loss": 2.9904,
"step": 108600
},
{
"epoch": 0.91,
"learning_rate": 2.751472596713784e-05,
"loss": 2.9882,
"step": 108700
},
{
"epoch": 0.91,
"learning_rate": 2.7265871785633487e-05,
"loss": 2.9885,
"step": 108800
},
{
"epoch": 0.91,
"learning_rate": 2.701450392552808e-05,
"loss": 2.9898,
"step": 108900
},
{
"epoch": 0.91,
"learning_rate": 2.6763136065422673e-05,
"loss": 2.9873,
"step": 109000
},
{
"epoch": 0.91,
"eval_accuracy": 0.43755826883979015,
"eval_loss": 3.0054852962493896,
"eval_runtime": 36.8956,
"eval_samples_per_second": 303.993,
"eval_steps_per_second": 2.548,
"step": 109000
},
{
"epoch": 0.91,
"learning_rate": 2.6511768205317264e-05,
"loss": 2.9921,
"step": 109100
},
{
"epoch": 0.91,
"learning_rate": 2.626040034521186e-05,
"loss": 2.9863,
"step": 109200
},
{
"epoch": 0.91,
"learning_rate": 2.600903248510645e-05,
"loss": 2.9902,
"step": 109300
},
{
"epoch": 0.92,
"learning_rate": 2.5757664625001045e-05,
"loss": 2.9823,
"step": 109400
},
{
"epoch": 0.92,
"learning_rate": 2.5508810443496693e-05,
"loss": 2.9978,
"step": 109500
},
{
"epoch": 0.92,
"learning_rate": 2.5257442583391284e-05,
"loss": 2.9859,
"step": 109600
},
{
"epoch": 0.92,
"learning_rate": 2.500607472328588e-05,
"loss": 2.9821,
"step": 109700
},
{
"epoch": 0.92,
"learning_rate": 2.4754706863180473e-05,
"loss": 2.9932,
"step": 109800
},
{
"epoch": 0.92,
"learning_rate": 2.4503339003075065e-05,
"loss": 2.9906,
"step": 109900
},
{
"epoch": 0.92,
"learning_rate": 2.4251971142969656e-05,
"loss": 2.9891,
"step": 110000
},
{
"epoch": 0.92,
"eval_accuracy": 0.4375182528895728,
"eval_loss": 3.004079580307007,
"eval_runtime": 36.2219,
"eval_samples_per_second": 309.647,
"eval_steps_per_second": 2.595,
"step": 110000
},
{
"epoch": 0.92,
"learning_rate": 2.400060328286425e-05,
"loss": 2.9875,
"step": 110100
},
{
"epoch": 0.92,
"learning_rate": 2.3749235422758845e-05,
"loss": 2.9859,
"step": 110200
},
{
"epoch": 0.92,
"learning_rate": 2.349786756265344e-05,
"loss": 2.9865,
"step": 110300
},
{
"epoch": 0.92,
"learning_rate": 2.3246499702548028e-05,
"loss": 2.994,
"step": 110400
},
{
"epoch": 0.92,
"learning_rate": 2.2995131842442623e-05,
"loss": 2.9817,
"step": 110500
},
{
"epoch": 0.93,
"learning_rate": 2.2743763982337217e-05,
"loss": 2.9915,
"step": 110600
},
{
"epoch": 0.93,
"learning_rate": 2.2492396122231812e-05,
"loss": 2.9927,
"step": 110700
},
{
"epoch": 0.93,
"learning_rate": 2.22410282621264e-05,
"loss": 2.9908,
"step": 110800
},
{
"epoch": 0.93,
"learning_rate": 2.1989660402020994e-05,
"loss": 2.9897,
"step": 110900
},
{
"epoch": 0.93,
"learning_rate": 2.173829254191559e-05,
"loss": 2.9835,
"step": 111000
},
{
"epoch": 0.93,
"eval_accuracy": 0.4377632628303773,
"eval_loss": 3.0032153129577637,
"eval_runtime": 36.5662,
"eval_samples_per_second": 306.731,
"eval_steps_per_second": 2.571,
"step": 111000
},
{
"epoch": 0.93,
"learning_rate": 2.1486924681810184e-05,
"loss": 2.9787,
"step": 111100
},
{
"epoch": 0.93,
"learning_rate": 2.123555682170477e-05,
"loss": 2.9831,
"step": 111200
},
{
"epoch": 0.93,
"learning_rate": 2.0984188961599366e-05,
"loss": 2.9913,
"step": 111300
},
{
"epoch": 0.93,
"learning_rate": 2.073282110149396e-05,
"loss": 2.9904,
"step": 111400
},
{
"epoch": 0.93,
"learning_rate": 2.0481453241388556e-05,
"loss": 2.9842,
"step": 111500
},
{
"epoch": 0.93,
"learning_rate": 2.0230085381283147e-05,
"loss": 2.987,
"step": 111600
},
{
"epoch": 0.93,
"learning_rate": 1.9978717521177738e-05,
"loss": 2.9868,
"step": 111700
},
{
"epoch": 0.94,
"learning_rate": 1.9727349661072333e-05,
"loss": 2.9887,
"step": 111800
},
{
"epoch": 0.94,
"learning_rate": 1.9475981800966928e-05,
"loss": 2.9844,
"step": 111900
},
{
"epoch": 0.94,
"learning_rate": 1.922461394086152e-05,
"loss": 2.9887,
"step": 112000
},
{
"epoch": 0.94,
"eval_accuracy": 0.4380391622766127,
"eval_loss": 3.0022435188293457,
"eval_runtime": 36.4456,
"eval_samples_per_second": 307.746,
"eval_steps_per_second": 2.579,
"step": 112000
},
{
"epoch": 0.94,
"learning_rate": 1.8973246080756113e-05,
"loss": 2.9792,
"step": 112100
},
{
"epoch": 0.94,
"learning_rate": 1.8721878220650705e-05,
"loss": 2.9813,
"step": 112200
},
{
"epoch": 0.94,
"learning_rate": 1.84705103605453e-05,
"loss": 2.9852,
"step": 112300
},
{
"epoch": 0.94,
"learning_rate": 1.821914250043989e-05,
"loss": 2.9927,
"step": 112400
},
{
"epoch": 0.94,
"learning_rate": 1.797028831893554e-05,
"loss": 2.9869,
"step": 112500
},
{
"epoch": 0.94,
"learning_rate": 1.7718920458830133e-05,
"loss": 2.9798,
"step": 112600
},
{
"epoch": 0.94,
"learning_rate": 1.7467552598724724e-05,
"loss": 2.982,
"step": 112700
},
{
"epoch": 0.94,
"learning_rate": 1.721618473861932e-05,
"loss": 2.9787,
"step": 112800
},
{
"epoch": 0.94,
"learning_rate": 1.6964816878513914e-05,
"loss": 2.9891,
"step": 112900
},
{
"epoch": 0.95,
"learning_rate": 1.671596269700956e-05,
"loss": 2.9876,
"step": 113000
},
{
"epoch": 0.95,
"eval_accuracy": 0.43829610679906095,
"eval_loss": 3.0009684562683105,
"eval_runtime": 37.5779,
"eval_samples_per_second": 298.473,
"eval_steps_per_second": 2.501,
"step": 113000
},
{
"epoch": 0.95,
"learning_rate": 1.6464594836904153e-05,
"loss": 2.9809,
"step": 113100
},
{
"epoch": 0.95,
"learning_rate": 1.6213226976798747e-05,
"loss": 2.9933,
"step": 113200
},
{
"epoch": 0.95,
"learning_rate": 1.596185911669334e-05,
"loss": 2.9868,
"step": 113300
},
{
"epoch": 0.95,
"learning_rate": 1.5710491256587933e-05,
"loss": 2.9867,
"step": 113400
},
{
"epoch": 0.95,
"learning_rate": 1.5459123396482525e-05,
"loss": 2.9831,
"step": 113500
},
{
"epoch": 0.95,
"learning_rate": 1.5207755536377118e-05,
"loss": 2.9857,
"step": 113600
},
{
"epoch": 0.95,
"learning_rate": 1.495638767627171e-05,
"loss": 2.9861,
"step": 113700
},
{
"epoch": 0.95,
"learning_rate": 1.4705019816166305e-05,
"loss": 2.9797,
"step": 113800
},
{
"epoch": 0.95,
"learning_rate": 1.4453651956060897e-05,
"loss": 2.9819,
"step": 113900
},
{
"epoch": 0.95,
"learning_rate": 1.4202284095955491e-05,
"loss": 2.9818,
"step": 114000
},
{
"epoch": 0.95,
"eval_accuracy": 0.4384379177103575,
"eval_loss": 2.9998745918273926,
"eval_runtime": 36.4806,
"eval_samples_per_second": 307.451,
"eval_steps_per_second": 2.577,
"step": 114000
},
{
"epoch": 0.95,
"learning_rate": 1.3950916235850083e-05,
"loss": 2.9861,
"step": 114100
},
{
"epoch": 0.96,
"learning_rate": 1.3699548375744677e-05,
"loss": 2.9859,
"step": 114200
},
{
"epoch": 0.96,
"learning_rate": 1.3448180515639268e-05,
"loss": 2.9864,
"step": 114300
},
{
"epoch": 0.96,
"learning_rate": 1.3196812655533863e-05,
"loss": 2.9818,
"step": 114400
},
{
"epoch": 0.96,
"learning_rate": 1.2945444795428454e-05,
"loss": 2.9732,
"step": 114500
},
{
"epoch": 0.96,
"learning_rate": 1.2694076935323049e-05,
"loss": 2.9859,
"step": 114600
},
{
"epoch": 0.96,
"learning_rate": 1.244270907521764e-05,
"loss": 2.9828,
"step": 114700
},
{
"epoch": 0.96,
"learning_rate": 1.2191341215112235e-05,
"loss": 2.9837,
"step": 114800
},
{
"epoch": 0.96,
"learning_rate": 1.1939973355006828e-05,
"loss": 2.9748,
"step": 114900
},
{
"epoch": 0.96,
"learning_rate": 1.1688605494901421e-05,
"loss": 2.9797,
"step": 115000
},
{
"epoch": 0.96,
"eval_accuracy": 0.43843651364192887,
"eval_loss": 2.999021291732788,
"eval_runtime": 36.1681,
"eval_samples_per_second": 310.108,
"eval_steps_per_second": 2.599,
"step": 115000
},
{
"epoch": 0.96,
"learning_rate": 1.1437237634796014e-05,
"loss": 2.9813,
"step": 115100
},
{
"epoch": 0.96,
"learning_rate": 1.1185869774690607e-05,
"loss": 2.978,
"step": 115200
},
{
"epoch": 0.96,
"learning_rate": 1.09345019145852e-05,
"loss": 2.9886,
"step": 115300
},
{
"epoch": 0.97,
"learning_rate": 1.0683134054479795e-05,
"loss": 2.9744,
"step": 115400
},
{
"epoch": 0.97,
"learning_rate": 1.0431766194374386e-05,
"loss": 2.9804,
"step": 115500
},
{
"epoch": 0.97,
"learning_rate": 1.0182912012870034e-05,
"loss": 2.984,
"step": 115600
},
{
"epoch": 0.97,
"learning_rate": 9.931544152764628e-06,
"loss": 2.985,
"step": 115700
},
{
"epoch": 0.97,
"learning_rate": 9.68017629265922e-06,
"loss": 2.9843,
"step": 115800
},
{
"epoch": 0.97,
"learning_rate": 9.428808432553814e-06,
"loss": 2.9809,
"step": 115900
},
{
"epoch": 0.97,
"learning_rate": 9.177440572448405e-06,
"loss": 2.9842,
"step": 116000
},
{
"epoch": 0.97,
"eval_accuracy": 0.43876225751738235,
"eval_loss": 2.9980885982513428,
"eval_runtime": 36.1964,
"eval_samples_per_second": 309.865,
"eval_steps_per_second": 2.597,
"step": 116000
},
{
"epoch": 0.97,
"learning_rate": 8.926072712342998e-06,
"loss": 2.9702,
"step": 116100
},
{
"epoch": 0.97,
"learning_rate": 8.674704852237591e-06,
"loss": 2.9799,
"step": 116200
},
{
"epoch": 0.97,
"learning_rate": 8.423336992132184e-06,
"loss": 2.9825,
"step": 116300
},
{
"epoch": 0.97,
"learning_rate": 8.171969132026777e-06,
"loss": 2.9726,
"step": 116400
},
{
"epoch": 0.97,
"learning_rate": 7.920601271921372e-06,
"loss": 2.9788,
"step": 116500
},
{
"epoch": 0.98,
"learning_rate": 7.669233411815965e-06,
"loss": 2.988,
"step": 116600
},
{
"epoch": 0.98,
"learning_rate": 7.417865551710557e-06,
"loss": 2.9795,
"step": 116700
},
{
"epoch": 0.98,
"learning_rate": 7.16649769160515e-06,
"loss": 2.9797,
"step": 116800
},
{
"epoch": 0.98,
"learning_rate": 6.915129831499744e-06,
"loss": 2.9735,
"step": 116900
},
{
"epoch": 0.98,
"learning_rate": 6.663761971394337e-06,
"loss": 2.9739,
"step": 117000
},
{
"epoch": 0.98,
"eval_accuracy": 0.43866397272737484,
"eval_loss": 2.9972493648529053,
"eval_runtime": 36.7078,
"eval_samples_per_second": 305.548,
"eval_steps_per_second": 2.561,
"step": 117000
},
{
"epoch": 0.98,
"learning_rate": 6.41239411128893e-06,
"loss": 2.9765,
"step": 117100
},
{
"epoch": 0.98,
"learning_rate": 6.161026251183523e-06,
"loss": 2.9859,
"step": 117200
},
{
"epoch": 0.98,
"learning_rate": 5.909658391078116e-06,
"loss": 2.9897,
"step": 117300
},
{
"epoch": 0.98,
"learning_rate": 5.658290530972709e-06,
"loss": 2.9855,
"step": 117400
},
{
"epoch": 0.98,
"learning_rate": 5.406922670867302e-06,
"loss": 2.9747,
"step": 117500
},
{
"epoch": 0.98,
"learning_rate": 5.155554810761895e-06,
"loss": 2.9732,
"step": 117600
},
{
"epoch": 0.98,
"learning_rate": 4.9041869506564885e-06,
"loss": 2.9796,
"step": 117700
},
{
"epoch": 0.99,
"learning_rate": 4.6528190905510815e-06,
"loss": 2.9782,
"step": 117800
},
{
"epoch": 0.99,
"learning_rate": 4.4014512304456745e-06,
"loss": 2.9841,
"step": 117900
},
{
"epoch": 0.99,
"learning_rate": 4.150083370340268e-06,
"loss": 2.9804,
"step": 118000
},
{
"epoch": 0.99,
"eval_accuracy": 0.43883737517831667,
"eval_loss": 2.9965155124664307,
"eval_runtime": 36.1347,
"eval_samples_per_second": 310.394,
"eval_steps_per_second": 2.601,
"step": 118000
},
{
"epoch": 0.99,
"learning_rate": 3.898715510234861e-06,
"loss": 2.9836,
"step": 118100
},
{
"epoch": 0.99,
"learning_rate": 3.6473476501294542e-06,
"loss": 2.9815,
"step": 118200
},
{
"epoch": 0.99,
"learning_rate": 3.398493468625101e-06,
"loss": 2.9744,
"step": 118300
},
{
"epoch": 0.99,
"learning_rate": 3.147125608519694e-06,
"loss": 2.9847,
"step": 118400
},
{
"epoch": 0.99,
"learning_rate": 2.8957577484142875e-06,
"loss": 2.9733,
"step": 118500
},
{
"epoch": 0.99,
"learning_rate": 2.6469035669099345e-06,
"loss": 2.9766,
"step": 118600
},
{
"epoch": 0.99,
"learning_rate": 2.395535706804528e-06,
"loss": 2.9802,
"step": 118700
},
{
"epoch": 0.99,
"learning_rate": 2.144167846699121e-06,
"loss": 2.9757,
"step": 118800
},
{
"epoch": 0.99,
"learning_rate": 1.8927999865937138e-06,
"loss": 2.9775,
"step": 118900
},
{
"epoch": 1.0,
"learning_rate": 1.641432126488307e-06,
"loss": 2.9828,
"step": 119000
},
{
"epoch": 1.0,
"eval_accuracy": 0.43901218169768724,
"eval_loss": 2.995953321456909,
"eval_runtime": 36.3994,
"eval_samples_per_second": 308.137,
"eval_steps_per_second": 2.582,
"step": 119000
},
{
"epoch": 1.0,
"learning_rate": 1.3900642663829e-06,
"loss": 2.9783,
"step": 119100
},
{
"epoch": 1.0,
"learning_rate": 1.1386964062774932e-06,
"loss": 2.9723,
"step": 119200
},
{
"epoch": 1.0,
"learning_rate": 8.873285461720863e-07,
"loss": 2.9817,
"step": 119300
},
{
"epoch": 1.0,
"learning_rate": 6.359606860666794e-07,
"loss": 2.9792,
"step": 119400
},
{
"epoch": 1.0,
"learning_rate": 3.8459282596127255e-07,
"loss": 2.982,
"step": 119500
},
{
"epoch": 1.0,
"step": 119547,
"total_flos": 1.455921831670228e+20,
"train_loss": 3.081914688561298,
"train_runtime": 169290.0352,
"train_samples_per_second": 169.48,
"train_steps_per_second": 0.706
}
],
"max_steps": 119547,
"num_train_epochs": 1,
"total_flos": 1.455921831670228e+20,
"trial_name": null,
"trial_params": null
}