{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 63, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 8.639281115996736, "learning_rate": 2.6315789473684208e-08, "logits/chosen": -1.015625, "logits/rejected": -1.390625, "logps/chosen": -45.5, "logps/rejected": -80.5, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15873015873015872, "grad_norm": 7.679101104209251, "learning_rate": 2.631578947368421e-07, "logits/chosen": -1.203125, "logits/rejected": -1.21875, "logps/chosen": -55.25, "logps/rejected": -57.5, "loss": 0.6937, "rewards/accuracies": 0.0694444477558136, "rewards/chosen": -1.6987323760986328e-06, "rewards/margins": -0.0013885498046875, "rewards/rejected": 0.00139617919921875, "step": 10 }, { "epoch": 0.31746031746031744, "grad_norm": 6.832273820892766, "learning_rate": 4.970588235294118e-07, "logits/chosen": -1.09375, "logits/rejected": -1.21875, "logps/chosen": -53.5, "logps/rejected": -56.75, "loss": 0.6895, "rewards/accuracies": 0.1875, "rewards/chosen": -0.01220703125, "rewards/margins": 0.0031280517578125, "rewards/rejected": -0.015380859375, "step": 20 }, { "epoch": 0.47619047619047616, "grad_norm": 7.271916625407268, "learning_rate": 4.676470588235294e-07, "logits/chosen": -1.1640625, "logits/rejected": -1.09375, "logps/chosen": -60.0, "logps/rejected": -56.75, "loss": 0.6819, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.04931640625, "rewards/margins": 0.033447265625, "rewards/rejected": -0.0830078125, "step": 30 }, { "epoch": 0.6349206349206349, "grad_norm": 9.143992879642836, "learning_rate": 4.38235294117647e-07, "logits/chosen": -1.3203125, "logits/rejected": -1.2578125, "logps/chosen": -63.25, "logps/rejected": -56.75, "loss": 0.6607, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.01531982421875, "rewards/margins": 0.08056640625, "rewards/rejected": -0.095703125, "step": 40 }, { "epoch": 0.7936507936507936, "grad_norm": 8.656910281802439, "learning_rate": 4.0882352941176465e-07, "logits/chosen": -1.171875, "logits/rejected": -1.2109375, "logps/chosen": -64.5, "logps/rejected": -55.25, "loss": 0.6601, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": 0.07958984375, "rewards/margins": 0.1142578125, "rewards/rejected": -0.03466796875, "step": 50 }, { "epoch": 0.9523809523809523, "grad_norm": 7.24400853291459, "learning_rate": 3.7941176470588235e-07, "logits/chosen": -1.25, "logits/rejected": -1.1953125, "logps/chosen": -62.0, "logps/rejected": -56.0, "loss": 0.6428, "rewards/accuracies": 0.375, "rewards/chosen": 0.099609375, "rewards/margins": 0.1171875, "rewards/rejected": -0.01708984375, "step": 60 }, { "epoch": 1.0, "eval_logits/chosen": -1.25, "eval_logits/rejected": -1.171875, "eval_logps/chosen": -65.0, "eval_logps/rejected": -57.25, "eval_loss": 0.6325781345367432, "eval_rewards/accuracies": 0.4107142984867096, "eval_rewards/chosen": 0.06103515625, "eval_rewards/margins": 0.1689453125, "eval_rewards/rejected": -0.10791015625, "eval_runtime": 12.383, "eval_samples_per_second": 16.151, "eval_steps_per_second": 0.565, "step": 63 } ], "logging_steps": 10, "max_steps": 189, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }