|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 63, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 8.639281115996736, |
|
"learning_rate": 2.6315789473684208e-08, |
|
"logits/chosen": -1.015625, |
|
"logits/rejected": -1.390625, |
|
"logps/chosen": -45.5, |
|
"logps/rejected": -80.5, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 7.679101104209251, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -55.25, |
|
"logps/rejected": -57.5, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.0694444477558136, |
|
"rewards/chosen": -1.6987323760986328e-06, |
|
"rewards/margins": -0.0013885498046875, |
|
"rewards/rejected": 0.00139617919921875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 6.832273820892766, |
|
"learning_rate": 4.970588235294118e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -53.5, |
|
"logps/rejected": -56.75, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.1875, |
|
"rewards/chosen": -0.01220703125, |
|
"rewards/margins": 0.0031280517578125, |
|
"rewards/rejected": -0.015380859375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 7.271916625407268, |
|
"learning_rate": 4.676470588235294e-07, |
|
"logits/chosen": -1.1640625, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -60.0, |
|
"logps/rejected": -56.75, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.04931640625, |
|
"rewards/margins": 0.033447265625, |
|
"rewards/rejected": -0.0830078125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 9.143992879642836, |
|
"learning_rate": 4.38235294117647e-07, |
|
"logits/chosen": -1.3203125, |
|
"logits/rejected": -1.2578125, |
|
"logps/chosen": -63.25, |
|
"logps/rejected": -56.75, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.01531982421875, |
|
"rewards/margins": 0.08056640625, |
|
"rewards/rejected": -0.095703125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 8.656910281802439, |
|
"learning_rate": 4.0882352941176465e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -64.5, |
|
"logps/rejected": -55.25, |
|
"loss": 0.6601, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": 0.07958984375, |
|
"rewards/margins": 0.1142578125, |
|
"rewards/rejected": -0.03466796875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 7.24400853291459, |
|
"learning_rate": 3.7941176470588235e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.1953125, |
|
"logps/chosen": -62.0, |
|
"logps/rejected": -56.0, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.099609375, |
|
"rewards/margins": 0.1171875, |
|
"rewards/rejected": -0.01708984375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.25, |
|
"eval_logits/rejected": -1.171875, |
|
"eval_logps/chosen": -65.0, |
|
"eval_logps/rejected": -57.25, |
|
"eval_loss": 0.6325781345367432, |
|
"eval_rewards/accuracies": 0.4107142984867096, |
|
"eval_rewards/chosen": 0.06103515625, |
|
"eval_rewards/margins": 0.1689453125, |
|
"eval_rewards/rejected": -0.10791015625, |
|
"eval_runtime": 12.383, |
|
"eval_samples_per_second": 16.151, |
|
"eval_steps_per_second": 0.565, |
|
"step": 63 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 189, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|