{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8888888888888888, "eval_steps": 4, "global_step": 7, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_completion_length": 362.30074795809657, "eval_kl": 0.0, "eval_loss": -9.876328113023192e-05, "eval_reward": 0.665381520986557, "eval_reward_std": 0.4824826947667382, "eval_rewards/accuracy_reward": 0.13372565534981815, "eval_rewards/format_reward": 0.5316558669913899, "eval_runtime": 333.2158, "eval_samples_per_second": 2.101, "eval_steps_per_second": 0.012, "step": 0 }, { "epoch": 0.5079365079365079, "eval_completion_length": 217.30788110803675, "eval_kl": 28.520580150462962, "eval_loss": 0.09147126972675323, "eval_reward": 0.923280468693486, "eval_reward_std": 0.3771378709762185, "eval_rewards/accuracy_reward": 0.13583003308762004, "eval_rewards/format_reward": 0.7874504283622459, "eval_runtime": 251.0843, "eval_samples_per_second": 2.788, "eval_steps_per_second": 0.016, "step": 4 }, { "completion_length": 208.86775398254395, "epoch": 0.6349206349206349, "grad_norm": 73.1821060180664, "kl": 1.2861328125, "learning_rate": 5.000000000000003e-06, "loss": 1.4987, "reward": 0.9737723916769028, "reward_std": 0.3762528672814369, "rewards/accuracy_reward": 0.12611607741564512, "rewards/format_reward": 0.8476562947034836, "step": 5 }, { "completion_length": 188.7966046333313, "epoch": 0.8888888888888888, "kl": 0.51080322265625, "reward": 0.9969308339059353, "reward_std": 0.3559337202459574, "rewards/accuracy_reward": 0.1283482201397419, "rewards/format_reward": 0.8685826286673546, "step": 7, "total_flos": 0.0, "train_loss": 1.0761486015149526, "train_runtime": 1434.8598, "train_samples_per_second": 1.394, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 7, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }