{"train/loss": 3.5063, "train/grad_norm": 2.443696975708008, "train/learning_rate": 0.0, "train/epoch": 2.0, "train/global_step": 2000, "_timestamp": 1716386681.8685813, "_runtime": 9027.865963220596, "_step": 226, "train_runtime": 7028.5663, "train_samples_per_second": 0.285, "train_steps_per_second": 0.285, "total_flos": 1.046896491923424e+16, "train_loss": 3.840830388069153}