{ "epoch": 0.8, "eval_logits/chosen": -0.5457379221916199, "eval_logits/rejected": -0.5458806753158569, "eval_logps/chosen": -317.97125244140625, "eval_logps/ref_response": -0.5449998378753662, "eval_logps/rejected": -331.12335205078125, "eval_loss": 0.6931814551353455, "eval_rewards/accuracies": 0.484375, "eval_rewards/chosen": -0.001830391469411552, "eval_rewards/margins": -0.000241550849750638, "eval_rewards/rejected": -0.0015888408524915576, "eval_runtime": 54.0012, "eval_samples": 240, "eval_samples_per_second": 4.444, "eval_steps_per_second": 0.148, "total_flos": 0.0, "train_loss": 0.693049430847168, "train_runtime": 130.1581, "train_samples": 240, "train_samples_per_second": 1.844, "train_steps_per_second": 0.023 }