{ "epoch": 0.9989071038251366, "eval_logits/chosen": -1.4181982278823853, "eval_logits/rejected": -1.3836873769760132, "eval_logps/chosen": -1.1345627307891846, "eval_logps/rejected": -1.5455946922302246, "eval_loss": 2.404977321624756, "eval_rewards/accuracies": 0.8102409839630127, "eval_rewards/chosen": -11.345626831054688, "eval_rewards/margins": 4.110320091247559, "eval_rewards/rejected": -15.45594596862793, "eval_runtime": 33.5188, "eval_samples": 1318, "eval_samples_per_second": 39.321, "eval_steps_per_second": 2.476, "total_flos": 0.0, "train_loss": 3.252214796955267, "train_runtime": 5969.5229, "train_samples": 58558, "train_samples_per_second": 9.809, "train_steps_per_second": 0.077 }