zephyr-7b-dpo-lora / eval_results.json
Wenboz's picture
End of training
a6a71eb verified
raw
history blame
572 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": 0.5316108465194702,
"eval_logits/rejected": 0.9396071434020996,
"eval_logps/chosen": -447.05303955078125,
"eval_logps/rejected": -536.2667236328125,
"eval_loss": 0.49303239583969116,
"eval_rewards/accuracies": 0.7242063283920288,
"eval_rewards/chosen": -1.7955820560455322,
"eval_rewards/margins": 0.9434418678283691,
"eval_rewards/rejected": -2.7390236854553223,
"eval_runtime": 363.1163,
"eval_samples": 2000,
"eval_samples_per_second": 5.508,
"eval_steps_per_second": 0.173
}