{ "epoch": 0.9959925193694897, "eval_logits/chosen": -2.511686325073242, "eval_logits/rejected": -2.44095516204834, "eval_logps/chosen": -0.6983144879341125, "eval_logps/ref_chosen": -0.7951558232307434, "eval_logps/ref_rejected": -0.8243172764778137, "eval_logps/rejected": -0.8109735250473022, "eval_loss": 0.4769956171512604, "eval_rewards/accuracies": 0.6814516186714172, "eval_rewards/chosen": 0.24210312962532043, "eval_rewards/margins": 0.20874378085136414, "eval_rewards/rejected": 0.033359345048666, "eval_runtime": 145.1612, "eval_samples": 1961, "eval_samples_per_second": 13.509, "eval_steps_per_second": 0.427, "total_flos": 0.0, "train_loss": 0.4860667634931245, "train_runtime": 16529.3176, "train_samples": 59876, "train_samples_per_second": 3.622, "train_steps_per_second": 0.014 }