{ "epoch": 1.0, "eval_logits/chosen": -1.1779704093933105, "eval_logits/rejected": -1.2511389255523682, "eval_logps/chosen": -335.2599792480469, "eval_logps/rejected": -317.35614013671875, "eval_loss": 0.6636306643486023, "eval_rewards/accuracies": 0.6746031641960144, "eval_rewards/chosen": 0.04071044921875, "eval_rewards/margins": 0.0732826218008995, "eval_rewards/rejected": -0.032572176307439804, "eval_runtime": 91.7551, "eval_samples": 2000, "eval_samples_per_second": 21.797, "eval_steps_per_second": 0.687, "total_flos": 0.0, "train_loss": 0.6732059223382543, "train_runtime": 6951.1203, "train_samples": 61134, "train_samples_per_second": 8.795, "train_steps_per_second": 0.034 }