{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.042973785990545764, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008594757198109154, "grad_norm": 0.06708361208438873, "learning_rate": 4.999451708687114e-06, "logits/chosen": 14.524938583374023, "logits/rejected": 14.82593822479248, "logps/chosen": -0.31433865427970886, "logps/rejected": -0.32406437397003174, "loss": 0.9442, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.4715079367160797, "rewards/margins": 0.014588532969355583, "rewards/rejected": -0.48609647154808044, "step": 10 }, { "epoch": 0.017189514396218308, "grad_norm": 0.056814808398485184, "learning_rate": 4.997807075247147e-06, "logits/chosen": 14.309213638305664, "logits/rejected": 14.978128433227539, "logps/chosen": -0.31283506751060486, "logps/rejected": -0.3911947011947632, "loss": 0.928, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.46925264596939087, "rewards/margins": 0.1175394207239151, "rewards/rejected": -0.5867919921875, "step": 20 }, { "epoch": 0.02578427159432746, "grad_norm": 0.061199307441711426, "learning_rate": 4.9950668210706795e-06, "logits/chosen": 14.68384075164795, "logits/rejected": 15.338122367858887, "logps/chosen": -0.3007296621799469, "logps/rejected": -0.3204456865787506, "loss": 0.9439, "rewards/accuracies": 0.4375, "rewards/chosen": -0.45109447836875916, "rewards/margins": 0.029573997482657433, "rewards/rejected": -0.48066848516464233, "step": 30 }, { "epoch": 0.034379028792436615, "grad_norm": 0.08423774689435959, "learning_rate": 4.9912321481237616e-06, "logits/chosen": 14.39265251159668, "logits/rejected": 15.059102058410645, "logps/chosen": -0.28216058015823364, "logps/rejected": -0.33495840430259705, "loss": 0.9184, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.42324090003967285, "rewards/margins": 0.07919676601886749, "rewards/rejected": -0.5024376511573792, "step": 40 }, { "epoch": 0.042973785990545764, "grad_norm": 0.06052614375948906, "learning_rate": 4.986304738420684e-06, "logits/chosen": 14.383735656738281, "logits/rejected": 15.029413223266602, "logps/chosen": -0.27970507740974426, "logps/rejected": -0.33213528990745544, "loss": 0.9317, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4195576310157776, "rewards/margins": 0.07864536345005035, "rewards/rejected": -0.49820294976234436, "step": 50 }, { "epoch": 0.042973785990545764, "eval_logits/chosen": 14.424538612365723, "eval_logits/rejected": 15.006633758544922, "eval_logps/chosen": -0.2923925220966339, "eval_logps/rejected": -0.3531996011734009, "eval_loss": 0.9324354529380798, "eval_rewards/accuracies": 0.5052631497383118, "eval_rewards/chosen": -0.43858882784843445, "eval_rewards/margins": 0.09121060371398926, "eval_rewards/rejected": -0.5297994017601013, "eval_runtime": 26.3759, "eval_samples_per_second": 28.549, "eval_steps_per_second": 3.602, "step": 50 } ], "logging_steps": 10, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1358668938490675e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }