{ "epoch": 1.0, "eval_logits/chosen": -2.1620335578918457, "eval_logits/rejected": -2.062356948852539, "eval_logps/chosen": -206.584716796875, "eval_logps/rejected": -214.93492126464844, "eval_loss": 0.521207869052887, "eval_rewards/accuracies": 0.7405660152435303, "eval_rewards/chosen": -2.5398268699645996, "eval_rewards/margins": 0.9912916421890259, "eval_rewards/rejected": -3.531118631362915, "eval_runtime": 424.2832, "eval_samples": 418, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.125 }