{ "epoch": 2.0, "eval_logits/chosen": -2.668743848800659, "eval_logits/rejected": -2.614588499069214, "eval_logps/chosen": -48.75492858886719, "eval_logps/rejected": -238.03585815429688, "eval_loss": 0.010777823626995087, "eval_rewards/accuracies": 0.9956896305084229, "eval_rewards/chosen": -1.6998504400253296, "eval_rewards/margins": 20.81882667541504, "eval_rewards/rejected": -22.5186767578125, "eval_runtime": 231.3578, "eval_samples": 905, "eval_samples_per_second": 3.912, "eval_steps_per_second": 0.125, "train_loss": 0.06552632141962543, "train_runtime": 3687.7187, "train_samples": 4570, "train_samples_per_second": 2.478, "train_steps_per_second": 0.078 }