{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9953917050691244, "eval_steps": 500, "global_step": 27, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03686635944700461, "grad_norm": 0.2873728095047169, "learning_rate": 0.0, "loss": 1.4734, "step": 1 }, { "epoch": 0.07373271889400922, "grad_norm": 0.27419596643661837, "learning_rate": 0.0003, "loss": 1.279, "step": 2 }, { "epoch": 0.11059907834101383, "grad_norm": 0.3351799319426796, "learning_rate": 0.0003, "loss": 1.4582, "step": 3 }, { "epoch": 0.14746543778801843, "grad_norm": 0.2789893810816952, "learning_rate": 0.0003, "loss": 1.1216, "step": 4 }, { "epoch": 0.18433179723502305, "grad_norm": 0.14190144952821382, "learning_rate": 0.0003, "loss": 1.0121, "step": 5 }, { "epoch": 0.22119815668202766, "grad_norm": 0.1542021738719797, "learning_rate": 0.0003, "loss": 1.0723, "step": 6 }, { "epoch": 0.25806451612903225, "grad_norm": 0.17476255091645262, "learning_rate": 0.0003, "loss": 0.8962, "step": 7 }, { "epoch": 0.29493087557603687, "grad_norm": 0.30183805297227384, "learning_rate": 0.0003, "loss": 0.9577, "step": 8 }, { "epoch": 0.3317972350230415, "grad_norm": 0.2945209326852545, "learning_rate": 0.0003, "loss": 0.9143, "step": 9 }, { "epoch": 0.3686635944700461, "grad_norm": 0.21480966699076806, "learning_rate": 0.0003, "loss": 0.7952, "step": 10 }, { "epoch": 0.4055299539170507, "grad_norm": 0.18078986894945484, "learning_rate": 0.0003, "loss": 0.8234, "step": 11 }, { "epoch": 0.4423963133640553, "grad_norm": 0.15453708977718567, "learning_rate": 0.0003, "loss": 0.7589, "step": 12 }, { "epoch": 0.4792626728110599, "grad_norm": 0.1631172234239537, "learning_rate": 0.0003, "loss": 0.7419, "step": 13 }, { "epoch": 0.5161290322580645, "grad_norm": 0.09781085387100458, "learning_rate": 0.0003, "loss": 0.7239, "step": 14 }, { "epoch": 0.5529953917050692, "grad_norm": 0.09897379010199117, "learning_rate": 0.0003, "loss": 0.7673, "step": 15 }, { "epoch": 0.5898617511520737, "grad_norm": 0.11558640849854486, "learning_rate": 0.0003, "loss": 0.7533, "step": 16 }, { "epoch": 0.6267281105990783, "grad_norm": 0.11345769354838794, "learning_rate": 0.0003, "loss": 0.7581, "step": 17 }, { "epoch": 0.663594470046083, "grad_norm": 0.1013501193678853, "learning_rate": 0.0003, "loss": 0.7224, "step": 18 }, { "epoch": 0.7004608294930875, "grad_norm": 0.09930580785134363, "learning_rate": 0.0003, "loss": 0.7597, "step": 19 }, { "epoch": 0.7373271889400922, "grad_norm": 0.10206714996240562, "learning_rate": 0.0003, "loss": 0.704, "step": 20 }, { "epoch": 0.7741935483870968, "grad_norm": 0.10775281367207125, "learning_rate": 0.0003, "loss": 0.6639, "step": 21 }, { "epoch": 0.8110599078341014, "grad_norm": 0.12015377273414085, "learning_rate": 0.0003, "loss": 0.7494, "step": 22 }, { "epoch": 0.847926267281106, "grad_norm": 0.08770642908913276, "learning_rate": 0.0003, "loss": 0.7115, "step": 23 }, { "epoch": 0.8847926267281107, "grad_norm": 0.135245894998221, "learning_rate": 0.0003, "loss": 0.7169, "step": 24 }, { "epoch": 0.9216589861751152, "grad_norm": 0.0993611544536447, "learning_rate": 0.0003, "loss": 0.6667, "step": 25 }, { "epoch": 0.9585253456221198, "grad_norm": 0.09795283307056235, "learning_rate": 0.0003, "loss": 0.6859, "step": 26 }, { "epoch": 0.9953917050691244, "grad_norm": 0.10408097730031732, "learning_rate": 0.0003, "loss": 0.6844, "step": 27 }, { "epoch": 0.9953917050691244, "step": 27, "total_flos": 5728527974400.0, "train_loss": 0.8656142420238919, "train_runtime": 448.8389, "train_samples_per_second": 1.934, "train_steps_per_second": 0.06 } ], "logging_steps": 1.0, "max_steps": 27, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5728527974400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }