{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9936305732484076, "eval_steps": 100, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012738853503184714, "grad_norm": 6.01347209816415, "learning_rate": 6.25e-08, "logits/chosen": -0.724609375, "logits/rejected": -0.806640625, "logps/chosen": -622.0, "logps/rejected": -564.0, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.12738853503184713, "grad_norm": 5.639556156867892, "learning_rate": 4.989935734988097e-07, "logits/chosen": -0.6623263955116272, "logits/rejected": -0.7241753339767456, "logps/chosen": -654.2222290039062, "logps/rejected": -562.888916015625, "loss": 0.6894, "rewards/accuracies": 0.2109375, "rewards/chosen": 0.002502229530364275, "rewards/margins": 0.00495868269354105, "rewards/rejected": -0.0024539099540561438, "step": 10 }, { "epoch": 0.25477707006369427, "grad_norm": 6.181167294159666, "learning_rate": 4.646121984004665e-07, "logits/chosen": -0.642578125, "logits/rejected": -0.743359386920929, "logps/chosen": -646.5999755859375, "logps/rejected": -580.0, "loss": 0.6494, "rewards/accuracies": 0.9046875238418579, "rewards/chosen": 0.02710266038775444, "rewards/margins": 0.08971557766199112, "rewards/rejected": -0.06258849799633026, "step": 20 }, { "epoch": 0.3821656050955414, "grad_norm": 5.776979981865101, "learning_rate": 3.877242453630256e-07, "logits/chosen": -0.642382800579071, "logits/rejected": -0.7914062738418579, "logps/chosen": -643.2000122070312, "logps/rejected": -598.7999877929688, "loss": 0.5537, "rewards/accuracies": 0.98828125, "rewards/chosen": -0.05901794508099556, "rewards/margins": 0.3102050721645355, "rewards/rejected": -0.36909180879592896, "step": 30 }, { "epoch": 0.5095541401273885, "grad_norm": 9.860246173361963, "learning_rate": 2.8355831645441387e-07, "logits/chosen": -0.701953113079071, "logits/rejected": -0.8720703125, "logps/chosen": -686.5999755859375, "logps/rejected": -666.7999877929688, "loss": 0.447, "rewards/accuracies": 0.9820312261581421, "rewards/chosen": -0.3854003846645355, "rewards/margins": 0.611523449420929, "rewards/rejected": -0.9966796636581421, "step": 40 }, { "epoch": 0.6369426751592356, "grad_norm": 8.511743565135482, "learning_rate": 1.7274575140626315e-07, "logits/chosen": -0.8095703125, "logits/rejected": -1.0634765625, "logps/chosen": -814.4000244140625, "logps/rejected": -885.7999877929688, "loss": 0.266, "rewards/accuracies": 0.953125, "rewards/chosen": -1.593359351158142, "rewards/margins": 1.6101562976837158, "rewards/rejected": -3.203906297683716, "step": 50 }, { "epoch": 0.7643312101910829, "grad_norm": 10.86178580728423, "learning_rate": 7.723433775328384e-08, "logits/chosen": -0.8453124761581421, "logits/rejected": -1.1669921875, "logps/chosen": -947.2000122070312, "logps/rejected": -1193.5999755859375, "loss": 0.1695, "rewards/accuracies": 0.9468749761581421, "rewards/chosen": -3.0523438453674316, "rewards/margins": 3.239062547683716, "rewards/rejected": -6.293749809265137, "step": 60 }, { "epoch": 0.89171974522293, "grad_norm": 7.133119735950532, "learning_rate": 1.5941282340065697e-08, "logits/chosen": -0.8511718511581421, "logits/rejected": -1.142578125, "logps/chosen": -964.4000244140625, "logps/rejected": -1271.5999755859375, "loss": 0.1281, "rewards/accuracies": 0.957812488079071, "rewards/chosen": -3.160937547683716, "rewards/margins": 4.007031440734863, "rewards/rejected": -7.168749809265137, "step": 70 }, { "epoch": 0.9936305732484076, "step": 78, "total_flos": 0.0, "train_loss": 0.386262208987505, "train_runtime": 654.7511, "train_samples_per_second": 15.271, "train_steps_per_second": 0.119 } ], "logging_steps": 10, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }