loss,grad_norm,learning_rate,epoch,step,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss 1.2534,0.788801253,4.00E-05,0.025,1,,,,,,,,, 1.6814,1.304793119,8.00E-05,0.05,2,,,,,,,,, 1.7566,1.277083755,0.00012,0.075,3,,,,,,,,, 1.3991,0.929054976,0.00016,0.1,4,,,,,,,,, 1.3387,0.8864097,0.0002,0.125,5,,,,,,,,, 1.055233836,7.1909,,0.125,5,1.055233836,7.1909,11.125,1.391,,,,, 0.8119,0.348697931,0.000192,0.15,6,,,,,,,,, 0.8837,0.462934077,0.000184,0.175,7,,,,,,,,, 0.6576,0.324428976,0.000176,0.2,8,,,,,,,,, 0.6484,0.224782541,0.000168,0.225,9,,,,,,,,, 0.7436,0.460241884,0.00016,0.25,10,,,,,,,,, 0.725510836,4.8682,,0.25,10,0.725510836,4.8682,16.433,2.054,,,,, 0.7571,0.258921564,0.000152,0.275,11,,,,,,,,, 0.9006,0.403984785,0.000144,0.3,12,,,,,,,,, 0.5278,0.19507888,0.000136,0.325,13,,,,,,,,, 0.5557,0.207217351,0.000128,0.35,14,,,,,,,,, 0.5042,0.140803739,0.00012,0.375,15,,,,,,,,, 0.647182524,4.8773,,0.375,15,0.647182524,4.8773,16.403,2.05,,,,, 0.6344,0.202345386,0.000112,0.4,16,,,,,,,,, 0.5956,0.282731414,0.000104,0.425,17,,,,,,,,, 0.6394,0.42618072,9.60E-05,0.45,18,,,,,,,,, 0.6881,0.231079757,8.80E-05,0.475,19,,,,,,,,, 0.5356,0.179146141,8.00E-05,0.5,20,,,,,,,,, 0.589806378,4.8632,,0.5,20,0.589806378,4.8632,16.45,2.056,,,,, 0.5028,0.184228301,7.20E-05,0.525,21,,,,,,,,, 0.4696,0.170585647,6.40E-05,0.55,22,,,,,,,,, 0.6429,0.298062533,5.60E-05,0.575,23,,,,,,,,, 0.5543,0.244517237,4.80E-05,0.6,24,,,,,,,,, 0.492,0.387174577,4.00E-05,0.625,25,,,,,,,,, 0.555610478,4.8657,,0.625,25,0.555610478,4.8657,16.442,2.055,,,,, 0.4687,0.348973632,3.20E-05,0.65,26,,,,,,,,, 0.5499,0.183923692,2.40E-05,0.675,27,,,,,,,,, 0.479,0.21796149,1.60E-05,0.7,28,,,,,,,,, 0.5323,0.287896246,8.00E-06,0.725,29,,,,,,,,, 0.5485,0.228732839,0,0.75,30,,,,,,,,, ,,,0.75,30,0.542289913,4.8679,16.434,2.054,,,,, ,,,0.75,30,,,,,116.3306,2.063,0.258,2.81939E+15,0.758431028