{ "epoch": 0.9981298423724285, "eval_logits/chosen": -7.053029537200928, "eval_logits/rejected": -7.020767688751221, "eval_logps/chosen": -18186.599609375, "eval_logps/rejected": -17593.8046875, "eval_loss": 0.011397347785532475, "eval_rewards/accuracies": 0.4395161271095276, "eval_rewards/chosen": -180.98304748535156, "eval_rewards/margins": -5.912590503692627, "eval_rewards/rejected": -175.0704803466797, "eval_runtime": 174.1418, "eval_samples_per_second": 11.261, "eval_steps_per_second": 0.178 }