{ "epoch": 0.9998638529611981, "eval_logits/chosen": 0.303020715713501, "eval_logits/rejected": 0.4951964318752289, "eval_logps/chosen": -607.077392578125, "eval_logps/rejected": -1515.866455078125, "eval_loss": 0.13239584863185883, "eval_rewards/accuracies": 0.9376528263092041, "eval_rewards/chosen": -2.673825979232788, "eval_rewards/margins": 9.565610885620117, "eval_rewards/rejected": -12.239436149597168, "eval_runtime": 3761.8245, "eval_samples": 4903, "eval_samples_per_second": 1.303, "eval_steps_per_second": 0.109 }