|
{ |
|
"best_metric": 0.38227128982543945, |
|
"best_model_checkpoint": "model_result/02_02_SoLAR_10.7B_DPO/checkpoint-2757", |
|
"epoch": 2.999592003263974, |
|
"eval_steps": 919, |
|
"global_step": 2757, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9998640010879913, |
|
"grad_norm": 29.002643585205078, |
|
"learning_rate": 4.4449818621523576e-07, |
|
"logits/chosen": -1.361188530921936, |
|
"logits/rejected": -1.3760977983474731, |
|
"logps/chosen": -369.7716064453125, |
|
"logps/rejected": -341.9248352050781, |
|
"loss": 0.9991, |
|
"rewards/accuracies": 0.6764485836029053, |
|
"rewards/chosen": 20.894943237304688, |
|
"rewards/margins": 2.2651174068450928, |
|
"rewards/rejected": 18.629825592041016, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.9998640010879913, |
|
"eval_logits/chosen": -1.3181335926055908, |
|
"eval_logits/rejected": -1.346123456954956, |
|
"eval_logps/chosen": -527.0983276367188, |
|
"eval_logps/rejected": -507.80621337890625, |
|
"eval_loss": 0.49919795989990234, |
|
"eval_rewards/accuracies": 0.8106383085250854, |
|
"eval_rewards/chosen": 5.702141761779785, |
|
"eval_rewards/margins": 3.5380544662475586, |
|
"eval_rewards/rejected": 2.1640872955322266, |
|
"eval_runtime": 456.9799, |
|
"eval_samples_per_second": 2.053, |
|
"eval_steps_per_second": 0.514, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.9997280021759827, |
|
"grad_norm": 29.223295211791992, |
|
"learning_rate": 3.3337363966142684e-07, |
|
"logits/chosen": -1.3160151243209839, |
|
"logits/rejected": -1.3324289321899414, |
|
"logps/chosen": -529.5614624023438, |
|
"logps/rejected": -521.8484497070312, |
|
"loss": 0.3824, |
|
"rewards/accuracies": 0.8457222580909729, |
|
"rewards/chosen": 4.916207790374756, |
|
"rewards/margins": 4.2801947593688965, |
|
"rewards/rejected": 0.6360131502151489, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 1.9997280021759827, |
|
"eval_logits/chosen": -1.3160426616668701, |
|
"eval_logits/rejected": -1.3441028594970703, |
|
"eval_logps/chosen": -559.829345703125, |
|
"eval_logps/rejected": -548.035400390625, |
|
"eval_loss": 0.39502739906311035, |
|
"eval_rewards/accuracies": 0.8446808457374573, |
|
"eval_rewards/chosen": 2.4290404319763184, |
|
"eval_rewards/margins": 4.287874221801758, |
|
"eval_rewards/rejected": -1.858833909034729, |
|
"eval_runtime": 456.8696, |
|
"eval_samples_per_second": 2.053, |
|
"eval_steps_per_second": 0.514, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 2.999592003263974, |
|
"grad_norm": 18.053089141845703, |
|
"learning_rate": 2.2224909310761788e-07, |
|
"logits/chosen": -1.3009860515594482, |
|
"logits/rejected": -1.3179205656051636, |
|
"logps/chosen": -557.4498291015625, |
|
"logps/rejected": -556.6650390625, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.8876156210899353, |
|
"rewards/chosen": 2.127683401107788, |
|
"rewards/margins": 4.969925403594971, |
|
"rewards/rejected": -2.8422415256500244, |
|
"step": 2757 |
|
}, |
|
{ |
|
"epoch": 2.999592003263974, |
|
"eval_logits/chosen": -1.2920633554458618, |
|
"eval_logits/rejected": -1.320629596710205, |
|
"eval_logps/chosen": -571.9027709960938, |
|
"eval_logps/rejected": -563.9360961914062, |
|
"eval_loss": 0.38227128982543945, |
|
"eval_rewards/accuracies": 0.8542553186416626, |
|
"eval_rewards/chosen": 1.2216957807540894, |
|
"eval_rewards/margins": 4.6706061363220215, |
|
"eval_rewards/rejected": -3.4489102363586426, |
|
"eval_runtime": 456.9375, |
|
"eval_samples_per_second": 2.053, |
|
"eval_steps_per_second": 0.514, |
|
"step": 2757 |
|
} |
|
], |
|
"logging_steps": 919, |
|
"max_steps": 4595, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 919, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|