{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2339, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04275331338178709, "grad_norm": 0.15714330971240997, "learning_rate": 6.666666666666667e-06, "loss": 2.323, "step": 100 }, { "epoch": 0.08550662676357418, "grad_norm": 0.22236719727516174, "learning_rate": 1.3333333333333333e-05, "loss": 2.2818, "step": 200 }, { "epoch": 0.12825994014536127, "grad_norm": 0.4057689309120178, "learning_rate": 2e-05, "loss": 2.1665, "step": 300 }, { "epoch": 0.17101325352714836, "grad_norm": 0.6240995526313782, "learning_rate": 1.9881538840448035e-05, "loss": 2.0323, "step": 400 }, { "epoch": 0.21376656690893545, "grad_norm": 1.0177165269851685, "learning_rate": 1.9528961971056615e-05, "loss": 1.9214, "step": 500 }, { "epoch": 0.25651988029072254, "grad_norm": 0.8750305771827698, "learning_rate": 1.8950622724781605e-05, "loss": 1.8745, "step": 600 }, { "epoch": 0.2992731936725096, "grad_norm": 1.0784953832626343, "learning_rate": 1.816022324916863e-05, "loss": 1.8557, "step": 700 }, { "epoch": 0.3420265070542967, "grad_norm": 1.0847351551055908, "learning_rate": 1.717648987189577e-05, "loss": 1.7955, "step": 800 }, { "epoch": 0.3847798204360838, "grad_norm": 1.1327402591705322, "learning_rate": 1.6022729432275364e-05, "loss": 1.7167, "step": 900 }, { "epoch": 0.4275331338178709, "grad_norm": 1.1066670417785645, "learning_rate": 1.4726277090211945e-05, "loss": 1.6959, "step": 1000 }, { "epoch": 0.47028644719965795, "grad_norm": 1.1382914781570435, "learning_rate": 1.3317848695254441e-05, "loss": 1.634, "step": 1100 }, { "epoch": 0.5130397605814451, "grad_norm": 1.3232320547103882, "learning_rate": 1.1830813059565374e-05, "loss": 1.6383, "step": 1200 }, { "epoch": 0.5557930739632322, "grad_norm": 1.2707828283309937, "learning_rate": 1.0300401376284509e-05, "loss": 1.662, "step": 1300 }, { "epoch": 0.5985463873450192, "grad_norm": 1.1388013362884521, "learning_rate": 8.762872513930507e-06, "loss": 1.6272, "step": 1400 }, { "epoch": 0.6412997007268063, "grad_norm": 1.339849829673767, "learning_rate": 7.254653962879187e-06, "loss": 1.6351, "step": 1500 }, { "epoch": 0.6840530141085934, "grad_norm": 1.3893671035766602, "learning_rate": 5.8114787868136125e-06, "loss": 1.5628, "step": 1600 }, { "epoch": 0.7268063274903805, "grad_norm": 1.172720193862915, "learning_rate": 4.4675390266924536e-06, "loss": 1.6123, "step": 1700 }, { "epoch": 0.7695596408721675, "grad_norm": 1.1993151903152466, "learning_rate": 3.2546756149860935e-06, "loss": 1.5979, "step": 1800 }, { "epoch": 0.8123129542539547, "grad_norm": 1.6297610998153687, "learning_rate": 2.2016239929203174e-06, "loss": 1.6089, "step": 1900 }, { "epoch": 0.8550662676357418, "grad_norm": 1.2422642707824707, "learning_rate": 1.3333333037387176e-06, "loss": 1.5803, "step": 2000 }, { "epoch": 0.8978195810175289, "grad_norm": 1.5311039686203003, "learning_rate": 6.703752918150241e-07, "loss": 1.6131, "step": 2100 }, { "epoch": 0.9405728943993159, "grad_norm": 1.3912514448165894, "learning_rate": 2.2845691211458298e-07, "loss": 1.5255, "step": 2200 }, { "epoch": 0.983326207781103, "grad_norm": 1.408026099205017, "learning_rate": 1.8048197374724852e-08, "loss": 1.5973, "step": 2300 }, { "epoch": 1.0, "step": 2339, "total_flos": 4.259518857216e+16, "train_loss": 1.7606283989458218, "train_runtime": 1228.9387, "train_samples_per_second": 3.806, "train_steps_per_second": 1.903 } ], "logging_steps": 100, "max_steps": 2339, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.259518857216e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }