{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 400, "global_step": 31, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "abs_diff": 0.3515625, "all_logps_1": -716.58203125, "all_logps_1_values": -716.58203125, "all_logps_2": 368.328125, "all_logps_2_values": 368.328125, "epoch": 0.032, "grad_norm": 15.12199720138031, "learning_rate": 2.5e-07, "logits/chosen": -1.640625, "logits/rejected": -1.7109375, "logps/chosen": -2.390625, "logps/rejected": -2.359375, "loss": 1.8057, "original_losses": 1.8046875, "rewards/accuracies": 0.375, "rewards/chosen": -6.0, "rewards/margins": -0.06982421875, "rewards/rejected": -5.90625, "step": 1, "weight": 1.0 }, { "abs_diff": 0.310546875, "all_logps_1": -898.46630859375, "all_logps_1_values": -898.46630859375, "all_logps_2": 469.18359375, "all_logps_2_values": 469.18359375, "epoch": 0.16, "grad_norm": 9.1493804495839, "learning_rate": 9.966191788709714e-07, "logits/chosen": -1.5859375, "logits/rejected": -1.625, "logps/chosen": -2.171875, "logps/rejected": -2.171875, "loss": 1.7542, "original_losses": 1.7578125, "rewards/accuracies": 0.4609375, "rewards/chosen": -5.4375, "rewards/margins": -0.0084228515625, "rewards/rejected": -5.4375, "step": 5, "weight": 1.0 }, { "abs_diff": 0.498046875, "all_logps_1": -776.373046875, "all_logps_1_values": -776.373046875, "all_logps_2": 409.0, "all_logps_2_values": 409.0, "epoch": 0.32, "grad_norm": 14.347515525649953, "learning_rate": 8.83022221559489e-07, "logits/chosen": -1.4140625, "logits/rejected": -1.3984375, "logps/chosen": -2.4375, "logps/rejected": -2.40625, "loss": 2.0064, "original_losses": 2.0, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -6.09375, "rewards/margins": -0.0703125, "rewards/rejected": -6.03125, "step": 10, "weight": 1.0 }, { "abs_diff": 0.400390625, "all_logps_1": -866.23828125, "all_logps_1_values": -866.23828125, "all_logps_2": 444.09063720703125, "all_logps_2_values": 444.09063720703125, "epoch": 0.48, "grad_norm": 9.610798301744433, "learning_rate": 6.434016163555451e-07, "logits/chosen": -1.59375, "logits/rejected": -1.59375, "logps/chosen": -2.375, "logps/rejected": -2.34375, "loss": 1.9072, "original_losses": 1.90625, "rewards/accuracies": 0.375, "rewards/chosen": -5.9375, "rewards/margins": -0.06787109375, "rewards/rejected": -5.875, "step": 15, "weight": 1.0 }, { "abs_diff": 0.302734375, "all_logps_1": -795.837890625, "all_logps_1_values": -795.837890625, "all_logps_2": 418.5874938964844, "all_logps_2_values": 418.5874938964844, "epoch": 0.64, "grad_norm": 8.931859579637203, "learning_rate": 3.56598383644455e-07, "logits/chosen": -1.5859375, "logits/rejected": -1.5859375, "logps/chosen": -2.390625, "logps/rejected": -2.359375, "loss": 1.835, "original_losses": 1.8359375, "rewards/accuracies": 0.3375000059604645, "rewards/chosen": -5.96875, "rewards/margins": -0.09228515625, "rewards/rejected": -5.875, "step": 20, "weight": 1.0 }, { "abs_diff": 0.4609375, "all_logps_1": -784.2203369140625, "all_logps_1_values": -784.2203369140625, "all_logps_2": 412.63751220703125, "all_logps_2_values": 412.63751220703125, "epoch": 0.8, "grad_norm": 26.344861955814572, "learning_rate": 1.1697777844051104e-07, "logits/chosen": -1.4296875, "logits/rejected": -1.375, "logps/chosen": -2.375, "logps/rejected": -2.4375, "loss": 1.7904, "original_losses": 1.7890625, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -5.9375, "rewards/margins": 0.1787109375, "rewards/rejected": -6.09375, "step": 25, "weight": 1.0 }, { "abs_diff": 0.455078125, "all_logps_1": -790.1726684570312, "all_logps_1_values": -790.1726684570312, "all_logps_2": 412.5531311035156, "all_logps_2_values": 412.5531311035156, "epoch": 0.96, "grad_norm": 10.248900083694645, "learning_rate": 3.380821129028488e-09, "logits/chosen": -1.390625, "logits/rejected": -1.4375, "logps/chosen": -2.484375, "logps/rejected": -2.421875, "loss": 1.9701, "original_losses": 1.96875, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": -6.21875, "rewards/margins": -0.1416015625, "rewards/rejected": -6.0625, "step": 30, "weight": 1.0 }, { "epoch": 0.992, "step": 31, "total_flos": 0.0, "train_loss": 1.8891444052419355, "train_runtime": 316.7887, "train_samples_per_second": 3.157, "train_steps_per_second": 0.098 } ], "logging_steps": 5, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }