{"train_samples_per_second":16.806,"train_steps_per_second":1.054,"train/learning_rate":1.8271604938271607e-05,"_timestamp":1.7401074588811877e+09,"_wandb":{"runtime":1565},"train/loss":0.087,"_step":11,"eval/mse":0.16845503449440002,"_runtime":1565.794890728,"eval/samples_per_second":51.676,"train/global_step":1648,"eval/runtime":13.5845,"eval/steps_per_second":3.239,"train_runtime":1563.7518,"total_flos":1.4044502808235866e+16,"train_loss":0.24941422783055353,"train/epoch":8,"eval/loss":0.16845500469207764,"train/grad_norm":0.8355919122695923}