|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 53.25663135243593, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.60155987739563, |
|
"logits/rejected": -2.560150146484375, |
|
"logps/chosen": -302.1136474609375, |
|
"logps/pi_response": -142.7164764404297, |
|
"logps/ref_response": -142.7164764404297, |
|
"logps/rejected": -372.4461364746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 33.71802345048987, |
|
"learning_rate": 4.930057285201027e-07, |
|
"logits/chosen": -2.5734050273895264, |
|
"logits/rejected": -2.528841257095337, |
|
"logps/chosen": -264.94677734375, |
|
"logps/pi_response": -133.82980346679688, |
|
"logps/ref_response": -121.70560455322266, |
|
"logps/rejected": -426.4489440917969, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": -0.24433700740337372, |
|
"rewards/margins": 0.1003943383693695, |
|
"rewards/rejected": -0.34473133087158203, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 42.366900514115656, |
|
"learning_rate": 4.187457503795526e-07, |
|
"logits/chosen": -2.4807186126708984, |
|
"logits/rejected": -2.4149527549743652, |
|
"logps/chosen": -322.50579833984375, |
|
"logps/pi_response": -146.48080444335938, |
|
"logps/ref_response": -126.39298248291016, |
|
"logps/rejected": -567.7706298828125, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8342545628547668, |
|
"rewards/margins": 0.9264798164367676, |
|
"rewards/rejected": -1.7607343196868896, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 37.21235124072302, |
|
"learning_rate": 2.8691164100062034e-07, |
|
"logits/chosen": -2.3930041790008545, |
|
"logits/rejected": -2.3236992359161377, |
|
"logps/chosen": -341.8916320800781, |
|
"logps/pi_response": -139.42262268066406, |
|
"logps/ref_response": -122.14532470703125, |
|
"logps/rejected": -547.8569946289062, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.8386001586914062, |
|
"rewards/margins": 0.8395956158638, |
|
"rewards/rejected": -1.6781957149505615, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 37.34482955607721, |
|
"learning_rate": 1.4248369943086995e-07, |
|
"logits/chosen": -2.4054012298583984, |
|
"logits/rejected": -2.3128952980041504, |
|
"logps/chosen": -299.69805908203125, |
|
"logps/pi_response": -133.9350128173828, |
|
"logps/ref_response": -125.8031234741211, |
|
"logps/rejected": -555.8270263671875, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.5645470023155212, |
|
"rewards/margins": 0.8838322758674622, |
|
"rewards/rejected": -1.4483792781829834, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 25.861200618216518, |
|
"learning_rate": 3.473909705816111e-08, |
|
"logits/chosen": -2.3677544593811035, |
|
"logits/rejected": -2.3059608936309814, |
|
"logps/chosen": -317.23077392578125, |
|
"logps/pi_response": -134.0728302001953, |
|
"logps/ref_response": -123.98958587646484, |
|
"logps/rejected": -542.5103759765625, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.656657874584198, |
|
"rewards/margins": 0.8108199238777161, |
|
"rewards/rejected": -1.467477798461914, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5438056961964752, |
|
"train_runtime": 2605.9239, |
|
"train_samples_per_second": 5.865, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|