{ "best_metric": 0.02237132005393505, "best_model_checkpoint": "hurricane_model/checkpoint-1000", "epoch": 4.0, "eval_steps": 100, "global_step": 1252, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03194888178913738, "grad_norm": 1.788046956062317, "learning_rate": 0.00019840255591054313, "loss": 0.5871, "step": 10 }, { "epoch": 0.06389776357827476, "grad_norm": 1.5475027561187744, "learning_rate": 0.00019680511182108628, "loss": 0.283, "step": 20 }, { "epoch": 0.09584664536741214, "grad_norm": 6.500663757324219, "learning_rate": 0.0001952076677316294, "loss": 0.1913, "step": 30 }, { "epoch": 0.12779552715654952, "grad_norm": 11.902435302734375, "learning_rate": 0.00019361022364217253, "loss": 0.2797, "step": 40 }, { "epoch": 0.1597444089456869, "grad_norm": 0.10494810342788696, "learning_rate": 0.00019201277955271565, "loss": 0.1443, "step": 50 }, { "epoch": 0.19169329073482427, "grad_norm": 3.824563980102539, "learning_rate": 0.0001904153354632588, "loss": 0.1807, "step": 60 }, { "epoch": 0.22364217252396165, "grad_norm": 5.14697790145874, "learning_rate": 0.00018881789137380192, "loss": 0.2322, "step": 70 }, { "epoch": 0.25559105431309903, "grad_norm": 1.0805331468582153, "learning_rate": 0.00018722044728434505, "loss": 0.1309, "step": 80 }, { "epoch": 0.28753993610223644, "grad_norm": 0.8772349953651428, "learning_rate": 0.0001856230031948882, "loss": 0.2684, "step": 90 }, { "epoch": 0.3194888178913738, "grad_norm": 0.09347983449697495, "learning_rate": 0.00018402555910543132, "loss": 0.1118, "step": 100 }, { "epoch": 0.3194888178913738, "eval_accuracy": 0.9476, "eval_loss": 0.14858174324035645, "eval_model_preparation_time": 0.0051, "eval_runtime": 14.0146, "eval_samples_per_second": 178.385, "eval_steps_per_second": 22.334, "step": 100 }, { "epoch": 0.3514376996805112, "grad_norm": 0.8523539304733276, "learning_rate": 0.00018242811501597444, "loss": 0.1224, "step": 110 }, { "epoch": 0.38338658146964855, "grad_norm": 26.86109733581543, "learning_rate": 0.00018083067092651756, "loss": 0.1597, "step": 120 }, { "epoch": 0.41533546325878595, "grad_norm": 0.07714489102363586, "learning_rate": 0.00017923322683706071, "loss": 0.3385, "step": 130 }, { "epoch": 0.4472843450479233, "grad_norm": 0.46879705786705017, "learning_rate": 0.00017763578274760384, "loss": 0.1338, "step": 140 }, { "epoch": 0.4792332268370607, "grad_norm": 0.19195349514484406, "learning_rate": 0.000176038338658147, "loss": 0.1792, "step": 150 }, { "epoch": 0.5111821086261981, "grad_norm": 1.775911569595337, "learning_rate": 0.0001744408945686901, "loss": 0.142, "step": 160 }, { "epoch": 0.5431309904153354, "grad_norm": 3.7460222244262695, "learning_rate": 0.00017284345047923323, "loss": 0.2113, "step": 170 }, { "epoch": 0.5750798722044729, "grad_norm": 1.6586716175079346, "learning_rate": 0.00017124600638977638, "loss": 0.2379, "step": 180 }, { "epoch": 0.6070287539936102, "grad_norm": 0.37085050344467163, "learning_rate": 0.00016964856230031948, "loss": 0.0626, "step": 190 }, { "epoch": 0.6389776357827476, "grad_norm": 1.5378907918930054, "learning_rate": 0.00016805111821086263, "loss": 0.1112, "step": 200 }, { "epoch": 0.6389776357827476, "eval_accuracy": 0.9752, "eval_loss": 0.0701194703578949, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.3233, "eval_samples_per_second": 202.868, "eval_steps_per_second": 25.399, "step": 200 }, { "epoch": 0.670926517571885, "grad_norm": 0.3154517114162445, "learning_rate": 0.00016645367412140575, "loss": 0.051, "step": 210 }, { "epoch": 0.7028753993610224, "grad_norm": 0.05547064542770386, "learning_rate": 0.0001648562300319489, "loss": 0.0512, "step": 220 }, { "epoch": 0.7348242811501597, "grad_norm": 0.0428142175078392, "learning_rate": 0.00016325878594249202, "loss": 0.1203, "step": 230 }, { "epoch": 0.7667731629392971, "grad_norm": 0.07227767258882523, "learning_rate": 0.00016166134185303515, "loss": 0.1993, "step": 240 }, { "epoch": 0.7987220447284346, "grad_norm": 4.76859188079834, "learning_rate": 0.0001600638977635783, "loss": 0.0644, "step": 250 }, { "epoch": 0.8306709265175719, "grad_norm": 0.06700027734041214, "learning_rate": 0.00015846645367412142, "loss": 0.0898, "step": 260 }, { "epoch": 0.8626198083067093, "grad_norm": 0.14176583290100098, "learning_rate": 0.00015686900958466454, "loss": 0.1146, "step": 270 }, { "epoch": 0.8945686900958466, "grad_norm": 0.23704954981803894, "learning_rate": 0.00015527156549520767, "loss": 0.1933, "step": 280 }, { "epoch": 0.9265175718849841, "grad_norm": 0.32876431941986084, "learning_rate": 0.00015367412140575082, "loss": 0.1404, "step": 290 }, { "epoch": 0.9584664536741214, "grad_norm": 0.569324791431427, "learning_rate": 0.00015207667731629394, "loss": 0.0694, "step": 300 }, { "epoch": 0.9584664536741214, "eval_accuracy": 0.9808, "eval_loss": 0.06084931641817093, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.4406, "eval_samples_per_second": 200.956, "eval_steps_per_second": 25.16, "step": 300 }, { "epoch": 0.9904153354632588, "grad_norm": 2.446889877319336, "learning_rate": 0.00015047923322683706, "loss": 0.0315, "step": 310 }, { "epoch": 1.0223642172523961, "grad_norm": 0.13555611670017242, "learning_rate": 0.0001488817891373802, "loss": 0.0476, "step": 320 }, { "epoch": 1.0543130990415335, "grad_norm": 0.076418437063694, "learning_rate": 0.00014728434504792333, "loss": 0.0388, "step": 330 }, { "epoch": 1.0862619808306708, "grad_norm": 0.034216273576021194, "learning_rate": 0.00014568690095846646, "loss": 0.0514, "step": 340 }, { "epoch": 1.1182108626198084, "grad_norm": 0.8253958821296692, "learning_rate": 0.00014408945686900958, "loss": 0.0734, "step": 350 }, { "epoch": 1.1501597444089458, "grad_norm": 0.5345448851585388, "learning_rate": 0.00014249201277955273, "loss": 0.0159, "step": 360 }, { "epoch": 1.182108626198083, "grad_norm": 0.031145131215453148, "learning_rate": 0.00014089456869009585, "loss": 0.1411, "step": 370 }, { "epoch": 1.2140575079872205, "grad_norm": 2.8567800521850586, "learning_rate": 0.000139297124600639, "loss": 0.0221, "step": 380 }, { "epoch": 1.2460063897763578, "grad_norm": 0.06364869326353073, "learning_rate": 0.00013769968051118212, "loss": 0.0326, "step": 390 }, { "epoch": 1.2779552715654952, "grad_norm": 0.024139605462551117, "learning_rate": 0.00013610223642172525, "loss": 0.0048, "step": 400 }, { "epoch": 1.2779552715654952, "eval_accuracy": 0.9744, "eval_loss": 0.09166774898767471, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.5348, "eval_samples_per_second": 199.445, "eval_steps_per_second": 24.971, "step": 400 }, { "epoch": 1.3099041533546325, "grad_norm": 0.061418768018484116, "learning_rate": 0.00013450479233226837, "loss": 0.0336, "step": 410 }, { "epoch": 1.34185303514377, "grad_norm": 0.04032498970627785, "learning_rate": 0.0001329073482428115, "loss": 0.0272, "step": 420 }, { "epoch": 1.3738019169329074, "grad_norm": 0.05345893278717995, "learning_rate": 0.00013130990415335464, "loss": 0.0232, "step": 430 }, { "epoch": 1.4057507987220448, "grad_norm": 0.1362222582101822, "learning_rate": 0.00012971246006389777, "loss": 0.0234, "step": 440 }, { "epoch": 1.4376996805111821, "grad_norm": 2.3406131267547607, "learning_rate": 0.00012811501597444092, "loss": 0.0339, "step": 450 }, { "epoch": 1.4696485623003195, "grad_norm": 0.01726055145263672, "learning_rate": 0.00012651757188498404, "loss": 0.0329, "step": 460 }, { "epoch": 1.5015974440894568, "grad_norm": 0.021831806749105453, "learning_rate": 0.00012492012779552716, "loss": 0.0539, "step": 470 }, { "epoch": 1.5335463258785942, "grad_norm": 0.01614479534327984, "learning_rate": 0.00012332268370607028, "loss": 0.071, "step": 480 }, { "epoch": 1.5654952076677318, "grad_norm": 0.2092825472354889, "learning_rate": 0.00012172523961661342, "loss": 0.0503, "step": 490 }, { "epoch": 1.5974440894568689, "grad_norm": 0.02242388017475605, "learning_rate": 0.00012012779552715656, "loss": 0.036, "step": 500 }, { "epoch": 1.5974440894568689, "eval_accuracy": 0.9836, "eval_loss": 0.05519802123308182, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.5164, "eval_samples_per_second": 199.738, "eval_steps_per_second": 25.007, "step": 500 }, { "epoch": 1.6293929712460065, "grad_norm": 1.8705270290374756, "learning_rate": 0.00011853035143769968, "loss": 0.0691, "step": 510 }, { "epoch": 1.6613418530351438, "grad_norm": 0.12965114414691925, "learning_rate": 0.00011693290734824283, "loss": 0.0254, "step": 520 }, { "epoch": 1.6932907348242812, "grad_norm": 0.2740132212638855, "learning_rate": 0.00011533546325878595, "loss": 0.0508, "step": 530 }, { "epoch": 1.7252396166134185, "grad_norm": 0.01778402552008629, "learning_rate": 0.00011373801916932908, "loss": 0.0145, "step": 540 }, { "epoch": 1.7571884984025559, "grad_norm": 0.014663617126643658, "learning_rate": 0.00011214057507987221, "loss": 0.0315, "step": 550 }, { "epoch": 1.7891373801916934, "grad_norm": 0.015493140555918217, "learning_rate": 0.00011054313099041533, "loss": 0.1852, "step": 560 }, { "epoch": 1.8210862619808306, "grad_norm": 0.05500736087560654, "learning_rate": 0.00010894568690095847, "loss": 0.0292, "step": 570 }, { "epoch": 1.8530351437699681, "grad_norm": 0.014051590114831924, "learning_rate": 0.0001073482428115016, "loss": 0.0193, "step": 580 }, { "epoch": 1.8849840255591053, "grad_norm": 0.050858091562986374, "learning_rate": 0.00010575079872204474, "loss": 0.0825, "step": 590 }, { "epoch": 1.9169329073482428, "grad_norm": 0.08435127884149551, "learning_rate": 0.00010415335463258787, "loss": 0.0594, "step": 600 }, { "epoch": 1.9169329073482428, "eval_accuracy": 0.9808, "eval_loss": 0.05469144880771637, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.5149, "eval_samples_per_second": 199.761, "eval_steps_per_second": 25.01, "step": 600 }, { "epoch": 1.9488817891373802, "grad_norm": 0.016737263649702072, "learning_rate": 0.000102555910543131, "loss": 0.0463, "step": 610 }, { "epoch": 1.9808306709265175, "grad_norm": 0.018503542989492416, "learning_rate": 0.00010095846645367413, "loss": 0.0213, "step": 620 }, { "epoch": 2.012779552715655, "grad_norm": 0.11741246283054352, "learning_rate": 9.936102236421726e-05, "loss": 0.006, "step": 630 }, { "epoch": 2.0447284345047922, "grad_norm": 0.015154512599110603, "learning_rate": 9.77635782747604e-05, "loss": 0.003, "step": 640 }, { "epoch": 2.07667731629393, "grad_norm": 0.017609527334570885, "learning_rate": 9.616613418530351e-05, "loss": 0.0017, "step": 650 }, { "epoch": 2.108626198083067, "grad_norm": 0.012800313532352448, "learning_rate": 9.456869009584664e-05, "loss": 0.0042, "step": 660 }, { "epoch": 2.1405750798722045, "grad_norm": 1.593684434890747, "learning_rate": 9.297124600638978e-05, "loss": 0.0132, "step": 670 }, { "epoch": 2.1725239616613417, "grad_norm": 0.01244130078703165, "learning_rate": 9.137380191693292e-05, "loss": 0.0015, "step": 680 }, { "epoch": 2.2044728434504792, "grad_norm": 0.011427606455981731, "learning_rate": 8.977635782747604e-05, "loss": 0.0355, "step": 690 }, { "epoch": 2.236421725239617, "grad_norm": 0.017172975465655327, "learning_rate": 8.817891373801918e-05, "loss": 0.0115, "step": 700 }, { "epoch": 2.236421725239617, "eval_accuracy": 0.9844, "eval_loss": 0.0627300888299942, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.3578, "eval_samples_per_second": 202.301, "eval_steps_per_second": 25.328, "step": 700 }, { "epoch": 2.268370607028754, "grad_norm": 0.010955709032714367, "learning_rate": 8.658146964856231e-05, "loss": 0.0135, "step": 710 }, { "epoch": 2.3003194888178915, "grad_norm": 0.013627716340124607, "learning_rate": 8.498402555910544e-05, "loss": 0.0328, "step": 720 }, { "epoch": 2.3322683706070286, "grad_norm": 0.024327559396624565, "learning_rate": 8.338658146964856e-05, "loss": 0.0049, "step": 730 }, { "epoch": 2.364217252396166, "grad_norm": 0.012321406975388527, "learning_rate": 8.17891373801917e-05, "loss": 0.0096, "step": 740 }, { "epoch": 2.3961661341853033, "grad_norm": 0.010169831104576588, "learning_rate": 8.019169329073483e-05, "loss": 0.0014, "step": 750 }, { "epoch": 2.428115015974441, "grad_norm": 0.010039562359452248, "learning_rate": 7.859424920127795e-05, "loss": 0.0482, "step": 760 }, { "epoch": 2.460063897763578, "grad_norm": 0.010927636176347733, "learning_rate": 7.699680511182109e-05, "loss": 0.0149, "step": 770 }, { "epoch": 2.4920127795527156, "grad_norm": 0.010604580864310265, "learning_rate": 7.539936102236423e-05, "loss": 0.0028, "step": 780 }, { "epoch": 2.523961661341853, "grad_norm": 0.009957361966371536, "learning_rate": 7.380191693290735e-05, "loss": 0.0042, "step": 790 }, { "epoch": 2.5559105431309903, "grad_norm": 0.01096680574119091, "learning_rate": 7.220447284345049e-05, "loss": 0.0016, "step": 800 }, { "epoch": 2.5559105431309903, "eval_accuracy": 0.9936, "eval_loss": 0.02957286313176155, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.3992, "eval_samples_per_second": 201.627, "eval_steps_per_second": 25.244, "step": 800 }, { "epoch": 2.587859424920128, "grad_norm": 0.010381845757365227, "learning_rate": 7.060702875399361e-05, "loss": 0.0013, "step": 810 }, { "epoch": 2.619808306709265, "grad_norm": 0.03203699365258217, "learning_rate": 6.900958466453674e-05, "loss": 0.0209, "step": 820 }, { "epoch": 2.6517571884984026, "grad_norm": 0.009094555862247944, "learning_rate": 6.741214057507987e-05, "loss": 0.0011, "step": 830 }, { "epoch": 2.68370607028754, "grad_norm": 0.008862023241817951, "learning_rate": 6.5814696485623e-05, "loss": 0.0287, "step": 840 }, { "epoch": 2.7156549520766773, "grad_norm": 0.00914779119193554, "learning_rate": 6.421725239616614e-05, "loss": 0.0012, "step": 850 }, { "epoch": 2.747603833865815, "grad_norm": 0.009615966118872166, "learning_rate": 6.261980830670928e-05, "loss": 0.0011, "step": 860 }, { "epoch": 2.779552715654952, "grad_norm": 0.035701438784599304, "learning_rate": 6.1022364217252406e-05, "loss": 0.0089, "step": 870 }, { "epoch": 2.8115015974440896, "grad_norm": 0.012455513700842857, "learning_rate": 5.942492012779552e-05, "loss": 0.0011, "step": 880 }, { "epoch": 2.8434504792332267, "grad_norm": 0.009292890317738056, "learning_rate": 5.782747603833866e-05, "loss": 0.0091, "step": 890 }, { "epoch": 2.8753993610223643, "grad_norm": 0.008451340720057487, "learning_rate": 5.623003194888179e-05, "loss": 0.004, "step": 900 }, { "epoch": 2.8753993610223643, "eval_accuracy": 0.9916, "eval_loss": 0.032514333724975586, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.4929, "eval_samples_per_second": 200.114, "eval_steps_per_second": 25.054, "step": 900 }, { "epoch": 2.9073482428115014, "grad_norm": 0.7223944067955017, "learning_rate": 5.4632587859424925e-05, "loss": 0.0019, "step": 910 }, { "epoch": 2.939297124600639, "grad_norm": 0.007820590399205685, "learning_rate": 5.3035143769968054e-05, "loss": 0.001, "step": 920 }, { "epoch": 2.9712460063897765, "grad_norm": 0.011096654459834099, "learning_rate": 5.1437699680511184e-05, "loss": 0.0011, "step": 930 }, { "epoch": 3.0031948881789137, "grad_norm": 0.007654030807316303, "learning_rate": 4.984025559105431e-05, "loss": 0.0011, "step": 940 }, { "epoch": 3.0351437699680512, "grad_norm": 0.010331162251532078, "learning_rate": 4.824281150159744e-05, "loss": 0.0009, "step": 950 }, { "epoch": 3.0670926517571884, "grad_norm": 0.007492523640394211, "learning_rate": 4.664536741214058e-05, "loss": 0.0009, "step": 960 }, { "epoch": 3.099041533546326, "grad_norm": 0.007593994960188866, "learning_rate": 4.504792332268371e-05, "loss": 0.001, "step": 970 }, { "epoch": 3.130990415335463, "grad_norm": 0.007749509997665882, "learning_rate": 4.345047923322684e-05, "loss": 0.0009, "step": 980 }, { "epoch": 3.1629392971246006, "grad_norm": 0.007726697251200676, "learning_rate": 4.185303514376997e-05, "loss": 0.0009, "step": 990 }, { "epoch": 3.194888178913738, "grad_norm": 0.007175728678703308, "learning_rate": 4.0255591054313104e-05, "loss": 0.0009, "step": 1000 }, { "epoch": 3.194888178913738, "eval_accuracy": 0.9948, "eval_loss": 0.02237132005393505, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.3213, "eval_samples_per_second": 202.9, "eval_steps_per_second": 25.403, "step": 1000 }, { "epoch": 3.2268370607028753, "grad_norm": 0.007404172793030739, "learning_rate": 3.8658146964856234e-05, "loss": 0.0019, "step": 1010 }, { "epoch": 3.258785942492013, "grad_norm": 0.007146772928535938, "learning_rate": 3.7060702875399364e-05, "loss": 0.0009, "step": 1020 }, { "epoch": 3.29073482428115, "grad_norm": 0.02829296886920929, "learning_rate": 3.546325878594249e-05, "loss": 0.0009, "step": 1030 }, { "epoch": 3.3226837060702876, "grad_norm": 0.007179423235356808, "learning_rate": 3.386581469648562e-05, "loss": 0.0009, "step": 1040 }, { "epoch": 3.3546325878594248, "grad_norm": 0.0070446510799229145, "learning_rate": 3.226837060702875e-05, "loss": 0.0009, "step": 1050 }, { "epoch": 3.3865814696485623, "grad_norm": 0.007006002124398947, "learning_rate": 3.067092651757188e-05, "loss": 0.0009, "step": 1060 }, { "epoch": 3.4185303514377, "grad_norm": 0.006977152545005083, "learning_rate": 2.907348242811502e-05, "loss": 0.0008, "step": 1070 }, { "epoch": 3.450479233226837, "grad_norm": 0.0067046028561890125, "learning_rate": 2.747603833865815e-05, "loss": 0.0009, "step": 1080 }, { "epoch": 3.4824281150159746, "grad_norm": 0.0067406343296170235, "learning_rate": 2.5878594249201278e-05, "loss": 0.0012, "step": 1090 }, { "epoch": 3.5143769968051117, "grad_norm": 0.006645725108683109, "learning_rate": 2.428115015974441e-05, "loss": 0.0008, "step": 1100 }, { "epoch": 3.5143769968051117, "eval_accuracy": 0.9936, "eval_loss": 0.027039185166358948, "eval_model_preparation_time": 0.0051, "eval_runtime": 13.3116, "eval_samples_per_second": 187.807, "eval_steps_per_second": 23.513, "step": 1100 }, { "epoch": 3.5463258785942493, "grad_norm": 0.0066969566978514194, "learning_rate": 2.268370607028754e-05, "loss": 0.0008, "step": 1110 }, { "epoch": 3.5782747603833864, "grad_norm": 0.0066523440182209015, "learning_rate": 2.108626198083067e-05, "loss": 0.0008, "step": 1120 }, { "epoch": 3.610223642172524, "grad_norm": 0.0067514642141759396, "learning_rate": 1.9488817891373803e-05, "loss": 0.0008, "step": 1130 }, { "epoch": 3.642172523961661, "grad_norm": 0.0066239056177437305, "learning_rate": 1.7891373801916932e-05, "loss": 0.001, "step": 1140 }, { "epoch": 3.6741214057507987, "grad_norm": 0.006579084321856499, "learning_rate": 1.6293929712460065e-05, "loss": 0.0008, "step": 1150 }, { "epoch": 3.7060702875399363, "grad_norm": 0.006953445728868246, "learning_rate": 1.4696485623003195e-05, "loss": 0.0009, "step": 1160 }, { "epoch": 3.7380191693290734, "grad_norm": 0.006420870777219534, "learning_rate": 1.3099041533546328e-05, "loss": 0.0008, "step": 1170 }, { "epoch": 3.769968051118211, "grad_norm": 0.006650346331298351, "learning_rate": 1.1501597444089457e-05, "loss": 0.0008, "step": 1180 }, { "epoch": 3.801916932907348, "grad_norm": 0.006530832499265671, "learning_rate": 9.904153354632589e-06, "loss": 0.0018, "step": 1190 }, { "epoch": 3.8338658146964857, "grad_norm": 0.007318571675568819, "learning_rate": 8.306709265175718e-06, "loss": 0.0008, "step": 1200 }, { "epoch": 3.8338658146964857, "eval_accuracy": 0.994, "eval_loss": 0.02559490129351616, "eval_model_preparation_time": 0.0051, "eval_runtime": 12.5362, "eval_samples_per_second": 199.423, "eval_steps_per_second": 24.968, "step": 1200 }, { "epoch": 3.8658146964856233, "grad_norm": 0.007055082358419895, "learning_rate": 6.70926517571885e-06, "loss": 0.001, "step": 1210 }, { "epoch": 3.8977635782747604, "grad_norm": 0.006507499609142542, "learning_rate": 5.111821086261981e-06, "loss": 0.0008, "step": 1220 }, { "epoch": 3.9297124600638975, "grad_norm": 0.006634117104113102, "learning_rate": 3.5143769968051118e-06, "loss": 0.0007, "step": 1230 }, { "epoch": 3.961661341853035, "grad_norm": 0.006404239218682051, "learning_rate": 1.9169329073482426e-06, "loss": 0.0008, "step": 1240 }, { "epoch": 3.9936102236421727, "grad_norm": 0.006795979104936123, "learning_rate": 3.194888178913738e-07, "loss": 0.0008, "step": 1250 }, { "epoch": 4.0, "step": 1252, "total_flos": 1.54983979229184e+18, "train_loss": 0.055932876091605174, "train_runtime": 533.4054, "train_samples_per_second": 37.495, "train_steps_per_second": 2.347 } ], "logging_steps": 10, "max_steps": 1252, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.54983979229184e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }