{ "epoch": 11.984422327996539, "eval_logits/chosen": -0.263671875, "eval_logits/rejected": -0.1396484375, "eval_logps/chosen": -280.0, "eval_logps/rejected": -364.0, "eval_loss": 0.18390525877475739, "eval_rewards/accuracies": 0.9897540807723999, "eval_rewards/chosen": 3.3125, "eval_rewards/margins": 12.5, "eval_rewards/rejected": -9.1875, "eval_runtime": 105.6283, "eval_samples_per_second": 36.846, "eval_steps_per_second": 1.155 }