{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01282051282051282, "grad_norm": 85.24139006970277, "learning_rate": 6.25e-08, "logits/generated": -1.035823106765747, "logits/real": -0.7845579385757446, "logps/generated": -221.43978881835938, "logps/real": -290.43585205078125, "loss": 0.9091, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.1282051282051282, "grad_norm": 57.78188477455482, "learning_rate": 4.857142857142857e-07, "logits/generated": -1.00705885887146, "logits/real": -0.9160953164100647, "logps/generated": -217.96315002441406, "logps/real": -228.15171813964844, "loss": 0.9081, "rewards/accuracies": 0.5, "rewards/generated": 0.11379219591617584, "rewards/margins": 0.00582469254732132, "rewards/real": 0.11961688101291656, "step": 10 }, { "epoch": 0.2564102564102564, "grad_norm": 55.19183682235091, "learning_rate": 4.142857142857143e-07, "logits/generated": -1.052150011062622, "logits/real": -1.0221259593963623, "logps/generated": -200.09579467773438, "logps/real": -193.20050048828125, "loss": 0.88, "rewards/accuracies": 0.5625, "rewards/generated": 0.9856117367744446, "rewards/margins": 0.019290633499622345, "rewards/real": 1.0049023628234863, "step": 20 }, { "epoch": 0.38461538461538464, "grad_norm": 70.19010436249343, "learning_rate": 3.4285714285714286e-07, "logits/generated": -1.1168638467788696, "logits/real": -1.0286533832550049, "logps/generated": -196.82545471191406, "logps/real": -194.75421142578125, "loss": 0.8597, "rewards/accuracies": 0.637499988079071, "rewards/generated": 1.657329797744751, "rewards/margins": 0.09445972740650177, "rewards/real": 1.7517893314361572, "step": 30 }, { "epoch": 0.5128205128205128, "grad_norm": 59.8434152735345, "learning_rate": 2.714285714285714e-07, "logits/generated": -1.0656118392944336, "logits/real": -0.9198936223983765, "logps/generated": -174.37677001953125, "logps/real": -206.28683471679688, "loss": 0.8633, "rewards/accuracies": 0.5249999761581421, "rewards/generated": 2.180746555328369, "rewards/margins": 0.10865961015224457, "rewards/real": 2.2894062995910645, "step": 40 }, { "epoch": 0.6410256410256411, "grad_norm": 62.50654161583806, "learning_rate": 2e-07, "logits/generated": -1.1591944694519043, "logits/real": -0.9199365377426147, "logps/generated": -167.31390380859375, "logps/real": -196.71131896972656, "loss": 0.8212, "rewards/accuracies": 0.762499988079071, "rewards/generated": 2.3674254417419434, "rewards/margins": 0.24083653092384338, "rewards/real": 2.608261823654175, "step": 50 }, { "epoch": 0.7692307692307693, "grad_norm": 56.97503359063291, "learning_rate": 1.2857142857142855e-07, "logits/generated": -1.0447438955307007, "logits/real": -0.9084192514419556, "logps/generated": -174.6784210205078, "logps/real": -183.24168395996094, "loss": 0.8568, "rewards/accuracies": 0.637499988079071, "rewards/generated": 2.725738525390625, "rewards/margins": 0.11709457635879517, "rewards/real": 2.8428330421447754, "step": 60 }, { "epoch": 0.8974358974358975, "grad_norm": 70.79977219793189, "learning_rate": 5.714285714285714e-08, "logits/generated": -1.0091984272003174, "logits/real": -0.8863734006881714, "logps/generated": -181.19216918945312, "logps/real": -198.0881805419922, "loss": 0.85, "rewards/accuracies": 0.6000000238418579, "rewards/generated": 2.8773930072784424, "rewards/margins": 0.09863928705453873, "rewards/real": 2.976032257080078, "step": 70 }, { "epoch": 1.0, "step": 78, "total_flos": 0.0, "train_loss": 0.8588812213677627, "train_runtime": 1105.0645, "train_samples_per_second": 2.248, "train_steps_per_second": 0.071 } ], "logging_steps": 10, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }