{ "epoch": 3.0, "eval_logits/chosen": -2.0344715118408203, "eval_logits/rejected": -1.9804012775421143, "eval_logps/chosen": -265.97662353515625, "eval_logps/rejected": -232.47203063964844, "eval_loss": 0.5272051095962524, "eval_rewards/accuracies": 0.734000027179718, "eval_rewards/chosen": -0.1408846527338028, "eval_rewards/margins": 0.7409887909889221, "eval_rewards/rejected": -0.8818734884262085, "eval_runtime": 1089.9901, "eval_samples": 2000, "eval_samples_per_second": 1.835, "eval_steps_per_second": 0.459 }