{ "epoch": 1.0, "eval_logits/chosen": -2.5093116760253906, "eval_logits/rejected": -2.4554219245910645, "eval_logps/chosen": -253.41494750976562, "eval_logps/rejected": -298.073486328125, "eval_loss": 0.5370525121688843, "eval_rewards/accuracies": 0.7264150977134705, "eval_rewards/chosen": -0.9334509372711182, "eval_rewards/margins": 0.7120789289474487, "eval_rewards/rejected": -1.6455297470092773, "eval_runtime": 287.3881, "eval_samples": 418, "eval_samples_per_second": 1.454, "eval_steps_per_second": 0.184, "train_loss": 0.5672297686573089, "train_runtime": 12946.9794, "train_samples": 8130, "train_samples_per_second": 0.628, "train_steps_per_second": 0.078 }