{ "epoch": 9.777777777777779, "eval_logits/chosen": -0.34323057532310486, "eval_logits/rejected": -0.4047623574733734, "eval_logps/chosen": -0.07215116173028946, "eval_logps/rejected": -0.6233159303665161, "eval_loss": 0.09428545832633972, "eval_odds_ratio_loss": 0.8242944478988647, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.0072151171043515205, "eval_rewards/margins": 0.05511648207902908, "eval_rewards/rejected": -0.06233159825205803, "eval_runtime": 2.331, "eval_samples_per_second": 25.74, "eval_sft_loss": 0.01185599621385336, "eval_steps_per_second": 12.87 }