{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.35842293906810035, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0017921146953405018, "eval_loss": 3.68882155418396, "eval_runtime": 2.8556, "eval_samples_per_second": 82.293, "eval_steps_per_second": 41.322, "step": 1 }, { "epoch": 0.017921146953405017, "grad_norm": 4.795637607574463, "learning_rate": 0.0002, "loss": 14.3965, "step": 10 }, { "epoch": 0.035842293906810034, "grad_norm": 5.258082389831543, "learning_rate": 0.0002, "loss": 13.813, "step": 20 }, { "epoch": 0.053763440860215055, "grad_norm": 4.523959636688232, "learning_rate": 0.0002, "loss": 13.6853, "step": 30 }, { "epoch": 0.07168458781362007, "grad_norm": 4.891673564910889, "learning_rate": 0.0002, "loss": 13.4352, "step": 40 }, { "epoch": 0.08960573476702509, "grad_norm": 5.062295436859131, "learning_rate": 0.0002, "loss": 13.0299, "step": 50 }, { "epoch": 0.08960573476702509, "eval_loss": 3.3310461044311523, "eval_runtime": 2.9394, "eval_samples_per_second": 79.947, "eval_steps_per_second": 40.144, "step": 50 }, { "epoch": 0.10752688172043011, "grad_norm": 5.369076728820801, "learning_rate": 0.0002, "loss": 13.1517, "step": 60 }, { "epoch": 0.12544802867383512, "grad_norm": 5.329068183898926, "learning_rate": 0.0002, "loss": 13.13, "step": 70 }, { "epoch": 0.14336917562724014, "grad_norm": 5.421701431274414, "learning_rate": 0.0002, "loss": 13.0658, "step": 80 }, { "epoch": 0.16129032258064516, "grad_norm": 5.6926751136779785, "learning_rate": 0.0002, "loss": 13.0606, "step": 90 }, { "epoch": 0.17921146953405018, "grad_norm": 4.89196252822876, "learning_rate": 0.0002, "loss": 13.2831, "step": 100 }, { "epoch": 0.17921146953405018, "eval_loss": 3.270508289337158, "eval_runtime": 3.02, "eval_samples_per_second": 77.814, "eval_steps_per_second": 39.073, "step": 100 }, { "epoch": 0.1971326164874552, "grad_norm": 5.2668070793151855, "learning_rate": 0.0002, "loss": 13.1221, "step": 110 }, { "epoch": 0.21505376344086022, "grad_norm": 4.916939735412598, "learning_rate": 0.0002, "loss": 12.991, "step": 120 }, { "epoch": 0.23297491039426524, "grad_norm": 5.837742805480957, "learning_rate": 0.0002, "loss": 13.1578, "step": 130 }, { "epoch": 0.25089605734767023, "grad_norm": 5.586097240447998, "learning_rate": 0.0002, "loss": 12.6136, "step": 140 }, { "epoch": 0.26881720430107525, "grad_norm": 5.033875465393066, "learning_rate": 0.0002, "loss": 13.0117, "step": 150 }, { "epoch": 0.26881720430107525, "eval_loss": 3.2495903968811035, "eval_runtime": 2.9557, "eval_samples_per_second": 79.507, "eval_steps_per_second": 39.923, "step": 150 }, { "epoch": 0.2867383512544803, "grad_norm": 4.852773189544678, "learning_rate": 0.0002, "loss": 12.8406, "step": 160 }, { "epoch": 0.3046594982078853, "grad_norm": 5.590361595153809, "learning_rate": 0.0002, "loss": 12.821, "step": 170 }, { "epoch": 0.3225806451612903, "grad_norm": 6.373524188995361, "learning_rate": 0.0002, "loss": 12.7874, "step": 180 }, { "epoch": 0.34050179211469533, "grad_norm": 4.901456356048584, "learning_rate": 0.0002, "loss": 12.9965, "step": 190 }, { "epoch": 0.35842293906810035, "grad_norm": 5.205903053283691, "learning_rate": 0.0002, "loss": 12.7484, "step": 200 }, { "epoch": 0.35842293906810035, "eval_loss": 3.230069875717163, "eval_runtime": 3.0502, "eval_samples_per_second": 77.044, "eval_steps_per_second": 38.686, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1501393059840000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }