AmberYifan's picture
Training in progress, step 124, checkpoint
cd5ba1b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3968,
"eval_steps": 31,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0032,
"grad_norm": 309.04591823661724,
"learning_rate": 5.3191489361702125e-09,
"logits/generated": -3.1874351501464844,
"logits/real": -2.811344623565674,
"logps/generated": -277.39678955078125,
"logps/real": -164.29153442382812,
"loss": 0.8248,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.032,
"grad_norm": 273.888694984639,
"learning_rate": 5.3191489361702123e-08,
"logits/generated": -2.979994058609009,
"logits/real": -2.536571979522705,
"logps/generated": -242.26495361328125,
"logps/real": -126.36863708496094,
"loss": 0.7579,
"rewards/accuracies": 0.8055555820465088,
"rewards/generated": -0.08589766174554825,
"rewards/margins": 0.1557125300168991,
"rewards/real": 0.06981485337018967,
"step": 10
},
{
"epoch": 0.064,
"grad_norm": 6.278552838520857,
"learning_rate": 1.0638297872340425e-07,
"logits/generated": -3.1302971839904785,
"logits/real": -2.4443600177764893,
"logps/generated": -277.353759765625,
"logps/real": -123.6572494506836,
"loss": 0.2741,
"rewards/accuracies": 1.0,
"rewards/generated": -1.92562997341156,
"rewards/margins": 3.013349771499634,
"rewards/real": 1.0877193212509155,
"step": 20
},
{
"epoch": 0.096,
"grad_norm": 2.184986357146384,
"learning_rate": 1.5957446808510638e-07,
"logits/generated": -2.7463412284851074,
"logits/real": -2.1607251167297363,
"logps/generated": -311.76275634765625,
"logps/real": -105.89111328125,
"loss": 0.1066,
"rewards/accuracies": 1.0,
"rewards/generated": -5.328858375549316,
"rewards/margins": 8.681567192077637,
"rewards/real": 3.3527092933654785,
"step": 30
},
{
"epoch": 0.0992,
"eval_logits/generated": -2.7707955837249756,
"eval_logits/real": -2.1341326236724854,
"eval_logps/generated": -309.7686767578125,
"eval_logps/real": -99.30474090576172,
"eval_loss": 0.10212492197751999,
"eval_rewards/accuracies": 1.0,
"eval_rewards/generated": -5.6528496742248535,
"eval_rewards/margins": 9.39120101928711,
"eval_rewards/real": 3.7383503913879395,
"eval_runtime": 52.8853,
"eval_samples_per_second": 3.782,
"eval_steps_per_second": 0.246,
"step": 31
},
{
"epoch": 0.128,
"grad_norm": 1.4683533798363122,
"learning_rate": 2.127659574468085e-07,
"logits/generated": -2.6473488807678223,
"logits/real": -2.092941999435425,
"logps/generated": -328.85003662109375,
"logps/real": -108.34922790527344,
"loss": 0.108,
"rewards/accuracies": 1.0,
"rewards/generated": -7.4022650718688965,
"rewards/margins": 11.075445175170898,
"rewards/real": 3.6731820106506348,
"step": 40
},
{
"epoch": 0.16,
"grad_norm": 1.3833819249775736,
"learning_rate": 2.659574468085106e-07,
"logits/generated": -2.673710346221924,
"logits/real": -1.7863633632659912,
"logps/generated": -341.03179931640625,
"logps/real": -99.21113586425781,
"loss": 0.0938,
"rewards/accuracies": 1.0,
"rewards/generated": -8.64827823638916,
"rewards/margins": 12.339118003845215,
"rewards/real": 3.6908397674560547,
"step": 50
},
{
"epoch": 0.192,
"grad_norm": 1.4745541409589713,
"learning_rate": 3.1914893617021275e-07,
"logits/generated": -2.457066297531128,
"logits/real": -1.9866771697998047,
"logps/generated": -337.7644958496094,
"logps/real": -100.87881469726562,
"loss": 0.0953,
"rewards/accuracies": 1.0,
"rewards/generated": -9.72453784942627,
"rewards/margins": 13.710101127624512,
"rewards/real": 3.9855639934539795,
"step": 60
},
{
"epoch": 0.1984,
"eval_logits/generated": -2.7848963737487793,
"eval_logits/real": -2.406802177429199,
"eval_logps/generated": -329.84674072265625,
"eval_logps/real": -96.66284942626953,
"eval_loss": 0.09936456382274628,
"eval_rewards/accuracies": 1.0,
"eval_rewards/generated": -7.660656452178955,
"eval_rewards/margins": 11.66319751739502,
"eval_rewards/real": 4.002540111541748,
"eval_runtime": 57.5754,
"eval_samples_per_second": 3.474,
"eval_steps_per_second": 0.226,
"step": 62
},
{
"epoch": 0.224,
"grad_norm": 1.323646793674921,
"learning_rate": 3.7234042553191484e-07,
"logits/generated": -2.6429388523101807,
"logits/real": -2.2720017433166504,
"logps/generated": -340.06341552734375,
"logps/real": -93.11213684082031,
"loss": 0.099,
"rewards/accuracies": 1.0,
"rewards/generated": -8.733041763305664,
"rewards/margins": 12.913464546203613,
"rewards/real": 4.180423259735107,
"step": 70
},
{
"epoch": 0.256,
"grad_norm": 1.5638794899127106,
"learning_rate": 4.25531914893617e-07,
"logits/generated": -2.6645286083221436,
"logits/real": -2.423600435256958,
"logps/generated": -337.8456726074219,
"logps/real": -98.58467102050781,
"loss": 0.0932,
"rewards/accuracies": 1.0,
"rewards/generated": -9.53776741027832,
"rewards/margins": 13.911605834960938,
"rewards/real": 4.373837947845459,
"step": 80
},
{
"epoch": 0.288,
"grad_norm": 1.3991564683587894,
"learning_rate": 4.787234042553192e-07,
"logits/generated": -2.6595396995544434,
"logits/real": -2.3267276287078857,
"logps/generated": -356.4102478027344,
"logps/real": -105.5425033569336,
"loss": 0.0963,
"rewards/accuracies": 1.0,
"rewards/generated": -9.849004745483398,
"rewards/margins": 14.890844345092773,
"rewards/real": 5.041840076446533,
"step": 90
},
{
"epoch": 0.2976,
"eval_logits/generated": -2.7958528995513916,
"eval_logits/real": -2.4889986515045166,
"eval_logps/generated": -360.5955505371094,
"eval_logps/real": -97.25410461425781,
"eval_loss": 0.09956898540258408,
"eval_rewards/accuracies": 1.0,
"eval_rewards/generated": -10.735539436340332,
"eval_rewards/margins": 14.678956985473633,
"eval_rewards/real": 3.943415403366089,
"eval_runtime": 58.0625,
"eval_samples_per_second": 3.445,
"eval_steps_per_second": 0.224,
"step": 93
},
{
"epoch": 0.32,
"grad_norm": 1.2445906541027463,
"learning_rate": 4.96437054631829e-07,
"logits/generated": -2.7101943492889404,
"logits/real": -2.502084493637085,
"logps/generated": -350.31158447265625,
"logps/real": -102.1003646850586,
"loss": 0.0909,
"rewards/accuracies": 1.0,
"rewards/generated": -10.010136604309082,
"rewards/margins": 15.269411087036133,
"rewards/real": 5.259275913238525,
"step": 100
},
{
"epoch": 0.352,
"grad_norm": 1.0986971210025431,
"learning_rate": 4.904988123515439e-07,
"logits/generated": -2.725268602371216,
"logits/real": -2.57702374458313,
"logps/generated": -361.0770263671875,
"logps/real": -92.5576171875,
"loss": 0.0871,
"rewards/accuracies": 1.0,
"rewards/generated": -11.147048950195312,
"rewards/margins": 16.710220336914062,
"rewards/real": 5.563170433044434,
"step": 110
},
{
"epoch": 0.384,
"grad_norm": 1.335382395511731,
"learning_rate": 4.845605700712589e-07,
"logits/generated": -2.6858296394348145,
"logits/real": -2.493039608001709,
"logps/generated": -359.83551025390625,
"logps/real": -86.32958984375,
"loss": 0.0904,
"rewards/accuracies": 1.0,
"rewards/generated": -11.481039047241211,
"rewards/margins": 17.33412742614746,
"rewards/real": 5.853088855743408,
"step": 120
},
{
"epoch": 0.3968,
"eval_logits/generated": -2.7832465171813965,
"eval_logits/real": -2.51879620552063,
"eval_logps/generated": -361.52301025390625,
"eval_logps/real": -98.11322784423828,
"eval_loss": 0.10012635588645935,
"eval_rewards/accuracies": 1.0,
"eval_rewards/generated": -10.828282356262207,
"eval_rewards/margins": 14.685786247253418,
"eval_rewards/real": 3.85750150680542,
"eval_runtime": 58.1864,
"eval_samples_per_second": 3.437,
"eval_steps_per_second": 0.223,
"step": 124
}
],
"logging_steps": 10,
"max_steps": 936,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 31,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}