|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3968, |
|
"eval_steps": 31, |
|
"global_step": 124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032, |
|
"grad_norm": 309.04591823661724, |
|
"learning_rate": 5.3191489361702125e-09, |
|
"logits/generated": -3.1874351501464844, |
|
"logits/real": -2.811344623565674, |
|
"logps/generated": -277.39678955078125, |
|
"logps/real": -164.29153442382812, |
|
"loss": 0.8248, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 273.888694984639, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/generated": -2.979994058609009, |
|
"logits/real": -2.536571979522705, |
|
"logps/generated": -242.26495361328125, |
|
"logps/real": -126.36863708496094, |
|
"loss": 0.7579, |
|
"rewards/accuracies": 0.8055555820465088, |
|
"rewards/generated": -0.08589766174554825, |
|
"rewards/margins": 0.1557125300168991, |
|
"rewards/real": 0.06981485337018967, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 6.278552838520857, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/generated": -3.1302971839904785, |
|
"logits/real": -2.4443600177764893, |
|
"logps/generated": -277.353759765625, |
|
"logps/real": -123.6572494506836, |
|
"loss": 0.2741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.92562997341156, |
|
"rewards/margins": 3.013349771499634, |
|
"rewards/real": 1.0877193212509155, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 2.184986357146384, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/generated": -2.7463412284851074, |
|
"logits/real": -2.1607251167297363, |
|
"logps/generated": -311.76275634765625, |
|
"logps/real": -105.89111328125, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.328858375549316, |
|
"rewards/margins": 8.681567192077637, |
|
"rewards/real": 3.3527092933654785, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0992, |
|
"eval_logits/generated": -2.7707955837249756, |
|
"eval_logits/real": -2.1341326236724854, |
|
"eval_logps/generated": -309.7686767578125, |
|
"eval_logps/real": -99.30474090576172, |
|
"eval_loss": 0.10212492197751999, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -5.6528496742248535, |
|
"eval_rewards/margins": 9.39120101928711, |
|
"eval_rewards/real": 3.7383503913879395, |
|
"eval_runtime": 52.8853, |
|
"eval_samples_per_second": 3.782, |
|
"eval_steps_per_second": 0.246, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 1.4683533798363122, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/generated": -2.6473488807678223, |
|
"logits/real": -2.092941999435425, |
|
"logps/generated": -328.85003662109375, |
|
"logps/real": -108.34922790527344, |
|
"loss": 0.108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.4022650718688965, |
|
"rewards/margins": 11.075445175170898, |
|
"rewards/real": 3.6731820106506348, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.3833819249775736, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/generated": -2.673710346221924, |
|
"logits/real": -1.7863633632659912, |
|
"logps/generated": -341.03179931640625, |
|
"logps/real": -99.21113586425781, |
|
"loss": 0.0938, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.64827823638916, |
|
"rewards/margins": 12.339118003845215, |
|
"rewards/real": 3.6908397674560547, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 1.4745541409589713, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/generated": -2.457066297531128, |
|
"logits/real": -1.9866771697998047, |
|
"logps/generated": -337.7644958496094, |
|
"logps/real": -100.87881469726562, |
|
"loss": 0.0953, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.72453784942627, |
|
"rewards/margins": 13.710101127624512, |
|
"rewards/real": 3.9855639934539795, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1984, |
|
"eval_logits/generated": -2.7848963737487793, |
|
"eval_logits/real": -2.406802177429199, |
|
"eval_logps/generated": -329.84674072265625, |
|
"eval_logps/real": -96.66284942626953, |
|
"eval_loss": 0.09936456382274628, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -7.660656452178955, |
|
"eval_rewards/margins": 11.66319751739502, |
|
"eval_rewards/real": 4.002540111541748, |
|
"eval_runtime": 57.5754, |
|
"eval_samples_per_second": 3.474, |
|
"eval_steps_per_second": 0.226, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 1.323646793674921, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/generated": -2.6429388523101807, |
|
"logits/real": -2.2720017433166504, |
|
"logps/generated": -340.06341552734375, |
|
"logps/real": -93.11213684082031, |
|
"loss": 0.099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.733041763305664, |
|
"rewards/margins": 12.913464546203613, |
|
"rewards/real": 4.180423259735107, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 1.5638794899127106, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/generated": -2.6645286083221436, |
|
"logits/real": -2.423600435256958, |
|
"logps/generated": -337.8456726074219, |
|
"logps/real": -98.58467102050781, |
|
"loss": 0.0932, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.53776741027832, |
|
"rewards/margins": 13.911605834960938, |
|
"rewards/real": 4.373837947845459, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 1.3991564683587894, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/generated": -2.6595396995544434, |
|
"logits/real": -2.3267276287078857, |
|
"logps/generated": -356.4102478027344, |
|
"logps/real": -105.5425033569336, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.849004745483398, |
|
"rewards/margins": 14.890844345092773, |
|
"rewards/real": 5.041840076446533, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2976, |
|
"eval_logits/generated": -2.7958528995513916, |
|
"eval_logits/real": -2.4889986515045166, |
|
"eval_logps/generated": -360.5955505371094, |
|
"eval_logps/real": -97.25410461425781, |
|
"eval_loss": 0.09956898540258408, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -10.735539436340332, |
|
"eval_rewards/margins": 14.678956985473633, |
|
"eval_rewards/real": 3.943415403366089, |
|
"eval_runtime": 58.0625, |
|
"eval_samples_per_second": 3.445, |
|
"eval_steps_per_second": 0.224, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.2445906541027463, |
|
"learning_rate": 4.96437054631829e-07, |
|
"logits/generated": -2.7101943492889404, |
|
"logits/real": -2.502084493637085, |
|
"logps/generated": -350.31158447265625, |
|
"logps/real": -102.1003646850586, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.010136604309082, |
|
"rewards/margins": 15.269411087036133, |
|
"rewards/real": 5.259275913238525, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 1.0986971210025431, |
|
"learning_rate": 4.904988123515439e-07, |
|
"logits/generated": -2.725268602371216, |
|
"logits/real": -2.57702374458313, |
|
"logps/generated": -361.0770263671875, |
|
"logps/real": -92.5576171875, |
|
"loss": 0.0871, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.147048950195312, |
|
"rewards/margins": 16.710220336914062, |
|
"rewards/real": 5.563170433044434, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 1.335382395511731, |
|
"learning_rate": 4.845605700712589e-07, |
|
"logits/generated": -2.6858296394348145, |
|
"logits/real": -2.493039608001709, |
|
"logps/generated": -359.83551025390625, |
|
"logps/real": -86.32958984375, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.481039047241211, |
|
"rewards/margins": 17.33412742614746, |
|
"rewards/real": 5.853088855743408, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"eval_logits/generated": -2.7832465171813965, |
|
"eval_logits/real": -2.51879620552063, |
|
"eval_logps/generated": -361.52301025390625, |
|
"eval_logps/real": -98.11322784423828, |
|
"eval_loss": 0.10012635588645935, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/generated": -10.828282356262207, |
|
"eval_rewards/margins": 14.685786247253418, |
|
"eval_rewards/real": 3.85750150680542, |
|
"eval_runtime": 58.1864, |
|
"eval_samples_per_second": 3.437, |
|
"eval_steps_per_second": 0.223, |
|
"step": 124 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 936, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 31, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|