|
{ |
|
"best_metric": 0.03174639865756035, |
|
"best_model_checkpoint": "./mixstral/31-03-24-Weni-WeniGPT-QA-Zephyr-7B-5.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Mixstral model and amnesia system prompt.-2_max_steps-786_batch_32_2024-03-31_ppid_1264/checkpoint-200", |
|
"epoch": 1.5194681861348527, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.4299395084381104, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.4387, |
|
"step": 20, |
|
"train/kl": 0.0385909266769886, |
|
"train/logps/chosen": -159.8199005126953, |
|
"train/logps/rejected": -160.9553680419922, |
|
"train/rewards/chosen": 0.08566487580537796, |
|
"train/rewards/margins": 0.623574860394001, |
|
"train/rewards/rejected": -0.537909984588623 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.6239124536514282, |
|
"learning_rate": 0.0001963254593175853, |
|
"loss": 0.1567, |
|
"step": 40, |
|
"train/kl": 0.3582279086112976, |
|
"train/logps/chosen": -123.32263946533203, |
|
"train/logps/rejected": -219.208251953125, |
|
"train/rewards/chosen": 1.9985122680664062, |
|
"train/rewards/margins": 7.9115424156188965, |
|
"train/rewards/rejected": -5.91303014755249 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -146.17608642578125, |
|
"eval/logps/rejected": -271.80560302734375, |
|
"eval/rewards/chosen": 3.3853886127471924, |
|
"eval/rewards/margins": 15.140896081924438, |
|
"eval/rewards/rejected": -11.755507469177246, |
|
"eval_loss": 0.057219523936510086, |
|
"eval_runtime": 1031.0752, |
|
"eval_samples_per_second": 0.467, |
|
"eval_steps_per_second": 0.117, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.16106899082660675, |
|
"learning_rate": 0.00019107611548556432, |
|
"loss": 0.0596, |
|
"step": 60, |
|
"train/kl": 0.1449735164642334, |
|
"train/logps/chosen": -131.60433959960938, |
|
"train/logps/rejected": -292.1778869628906, |
|
"train/rewards/chosen": 3.4192304611206055, |
|
"train/rewards/margins": 16.215981483459473, |
|
"train/rewards/rejected": -12.796751022338867 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.6279284358024597, |
|
"learning_rate": 0.00018582677165354333, |
|
"loss": 0.0528, |
|
"step": 80, |
|
"train/kl": 0.047291625291109085, |
|
"train/logps/chosen": -116.19053649902344, |
|
"train/logps/rejected": -327.73248291015625, |
|
"train/rewards/chosen": 3.602910041809082, |
|
"train/rewards/margins": 21.11134433746338, |
|
"train/rewards/rejected": -17.508434295654297 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.3628041744232178, |
|
"learning_rate": 0.0001805774278215223, |
|
"loss": 0.0481, |
|
"step": 100, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -127.67686462402344, |
|
"train/logps/rejected": -339.4940185546875, |
|
"train/rewards/chosen": 3.723008155822754, |
|
"train/rewards/margins": 21.06360149383545, |
|
"train/rewards/rejected": -17.340593338012695 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -140.9491424560547, |
|
"eval/logps/rejected": -331.2147216796875, |
|
"eval/rewards/chosen": 3.908082962036133, |
|
"eval/rewards/margins": 21.604503631591797, |
|
"eval/rewards/rejected": -17.696420669555664, |
|
"eval_loss": 0.04277317598462105, |
|
"eval_runtime": 1009.2383, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.12, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.2859511077404022, |
|
"learning_rate": 0.00017532808398950132, |
|
"loss": 0.0438, |
|
"step": 120, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -107.13507080078125, |
|
"train/logps/rejected": -362.26336669921875, |
|
"train/rewards/chosen": 3.8003525733947754, |
|
"train/rewards/margins": 23.768267154693604, |
|
"train/rewards/rejected": -19.967914581298828 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.27395951747894287, |
|
"learning_rate": 0.00017007874015748033, |
|
"loss": 0.0391, |
|
"step": 140, |
|
"train/kl": 0.0, |
|
"train/logps/chosen": -116.65599822998047, |
|
"train/logps/rejected": -404.44793701171875, |
|
"train/rewards/chosen": 3.8443548679351807, |
|
"train/rewards/margins": 28.49311852455139, |
|
"train/rewards/rejected": -24.64876365661621 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -137.66822814941406, |
|
"eval/logps/rejected": -436.40203857421875, |
|
"eval/rewards/chosen": 4.236174583435059, |
|
"eval/rewards/margins": 32.45132541656494, |
|
"eval/rewards/rejected": -28.215150833129883, |
|
"eval_loss": 0.03409990668296814, |
|
"eval_runtime": 1008.6969, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.12, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.5016167163848877, |
|
"learning_rate": 0.00016482939632545934, |
|
"loss": 0.0283, |
|
"step": 160, |
|
"train/kl": 0.11301638185977936, |
|
"train/logps/chosen": -113.57328796386719, |
|
"train/logps/rejected": -447.1547546386719, |
|
"train/rewards/chosen": 4.402313709259033, |
|
"train/rewards/margins": 33.74688768386841, |
|
"train/rewards/rejected": -29.344573974609375 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.051358386874198914, |
|
"learning_rate": 0.00015958005249343832, |
|
"loss": 0.0242, |
|
"step": 180, |
|
"train/kl": 0.05546154826879501, |
|
"train/logps/chosen": -101.12237548828125, |
|
"train/logps/rejected": -464.6999206542969, |
|
"train/rewards/chosen": 4.362555027008057, |
|
"train/rewards/margins": 34.47586107254028, |
|
"train/rewards/rejected": -30.113306045532227 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.044250741600990295, |
|
"learning_rate": 0.00015433070866141733, |
|
"loss": 0.0209, |
|
"step": 200, |
|
"train/kl": 0.17102402448654175, |
|
"train/logps/chosen": -117.83503723144531, |
|
"train/logps/rejected": -454.4959411621094, |
|
"train/rewards/chosen": 4.654594898223877, |
|
"train/rewards/margins": 34.24535036087036, |
|
"train/rewards/rejected": -29.590755462646484 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval/kl": 0.0, |
|
"eval/logps/chosen": -136.6546630859375, |
|
"eval/logps/rejected": -475.3577575683594, |
|
"eval/rewards/chosen": 4.337530612945557, |
|
"eval/rewards/margins": 36.44825220108032, |
|
"eval/rewards/rejected": -32.110721588134766, |
|
"eval_loss": 0.03174639865756035, |
|
"eval_runtime": 1008.2427, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.12, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 786, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|