SoLAR_TaskInstruct_LoRA_DPO / trainer_state.json
Enkeeper's picture
Upload 12 files
56c16ca verified
{
"best_metric": 0.38227128982543945,
"best_model_checkpoint": "model_result/02_02_SoLAR_10.7B_DPO/checkpoint-2757",
"epoch": 2.999592003263974,
"eval_steps": 919,
"global_step": 2757,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9998640010879913,
"grad_norm": 29.002643585205078,
"learning_rate": 4.4449818621523576e-07,
"logits/chosen": -1.361188530921936,
"logits/rejected": -1.3760977983474731,
"logps/chosen": -369.7716064453125,
"logps/rejected": -341.9248352050781,
"loss": 0.9991,
"rewards/accuracies": 0.6764485836029053,
"rewards/chosen": 20.894943237304688,
"rewards/margins": 2.2651174068450928,
"rewards/rejected": 18.629825592041016,
"step": 919
},
{
"epoch": 0.9998640010879913,
"eval_logits/chosen": -1.3181335926055908,
"eval_logits/rejected": -1.346123456954956,
"eval_logps/chosen": -527.0983276367188,
"eval_logps/rejected": -507.80621337890625,
"eval_loss": 0.49919795989990234,
"eval_rewards/accuracies": 0.8106383085250854,
"eval_rewards/chosen": 5.702141761779785,
"eval_rewards/margins": 3.5380544662475586,
"eval_rewards/rejected": 2.1640872955322266,
"eval_runtime": 456.9799,
"eval_samples_per_second": 2.053,
"eval_steps_per_second": 0.514,
"step": 919
},
{
"epoch": 1.9997280021759827,
"grad_norm": 29.223295211791992,
"learning_rate": 3.3337363966142684e-07,
"logits/chosen": -1.3160151243209839,
"logits/rejected": -1.3324289321899414,
"logps/chosen": -529.5614624023438,
"logps/rejected": -521.8484497070312,
"loss": 0.3824,
"rewards/accuracies": 0.8457222580909729,
"rewards/chosen": 4.916207790374756,
"rewards/margins": 4.2801947593688965,
"rewards/rejected": 0.6360131502151489,
"step": 1838
},
{
"epoch": 1.9997280021759827,
"eval_logits/chosen": -1.3160426616668701,
"eval_logits/rejected": -1.3441028594970703,
"eval_logps/chosen": -559.829345703125,
"eval_logps/rejected": -548.035400390625,
"eval_loss": 0.39502739906311035,
"eval_rewards/accuracies": 0.8446808457374573,
"eval_rewards/chosen": 2.4290404319763184,
"eval_rewards/margins": 4.287874221801758,
"eval_rewards/rejected": -1.858833909034729,
"eval_runtime": 456.8696,
"eval_samples_per_second": 2.053,
"eval_steps_per_second": 0.514,
"step": 1838
},
{
"epoch": 2.999592003263974,
"grad_norm": 18.053089141845703,
"learning_rate": 2.2224909310761788e-07,
"logits/chosen": -1.3009860515594482,
"logits/rejected": -1.3179205656051636,
"logps/chosen": -557.4498291015625,
"logps/rejected": -556.6650390625,
"loss": 0.2672,
"rewards/accuracies": 0.8876156210899353,
"rewards/chosen": 2.127683401107788,
"rewards/margins": 4.969925403594971,
"rewards/rejected": -2.8422415256500244,
"step": 2757
},
{
"epoch": 2.999592003263974,
"eval_logits/chosen": -1.2920633554458618,
"eval_logits/rejected": -1.320629596710205,
"eval_logps/chosen": -571.9027709960938,
"eval_logps/rejected": -563.9360961914062,
"eval_loss": 0.38227128982543945,
"eval_rewards/accuracies": 0.8542553186416626,
"eval_rewards/chosen": 1.2216957807540894,
"eval_rewards/margins": 4.6706061363220215,
"eval_rewards/rejected": -3.4489102363586426,
"eval_runtime": 456.9375,
"eval_samples_per_second": 2.053,
"eval_steps_per_second": 0.514,
"step": 2757
}
],
"logging_steps": 919,
"max_steps": 4595,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 919,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}