{ "epoch": 3.0, "eval_logits/chosen": -2.107180118560791, "eval_logits/rejected": -2.047370672225952, "eval_logps/chosen": -264.3012390136719, "eval_logps/rejected": -214.9073944091797, "eval_loss": 0.500950038433075, "eval_rewards/accuracies": 0.8583333492279053, "eval_rewards/chosen": 1.1422334909439087, "eval_rewards/margins": 2.9307103157043457, "eval_rewards/rejected": -1.7884769439697266, "eval_runtime": 125.5521, "eval_samples_per_second": 15.133, "eval_steps_per_second": 0.239 }