{ "best_metric": 0.03174639865756035, "best_model_checkpoint": "./mixstral/31-03-24-Weni-WeniGPT-QA-Zephyr-7B-5.0.2-KTO_WeniGPT Experiment using KTO trainer with no collator, Mixstral model and amnesia system prompt.-2_max_steps-786_batch_32_2024-03-31_ppid_1264/checkpoint-200", "epoch": 1.5194681861348527, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "grad_norm": 2.4299395084381104, "learning_rate": 0.00015000000000000001, "loss": 0.4387, "step": 20, "train/kl": 0.0385909266769886, "train/logps/chosen": -159.8199005126953, "train/logps/rejected": -160.9553680419922, "train/rewards/chosen": 0.08566487580537796, "train/rewards/margins": 0.623574860394001, "train/rewards/rejected": -0.537909984588623 }, { "epoch": 0.3, "grad_norm": 0.6239124536514282, "learning_rate": 0.0001963254593175853, "loss": 0.1567, "step": 40, "train/kl": 0.3582279086112976, "train/logps/chosen": -123.32263946533203, "train/logps/rejected": -219.208251953125, "train/rewards/chosen": 1.9985122680664062, "train/rewards/margins": 7.9115424156188965, "train/rewards/rejected": -5.91303014755249 }, { "epoch": 0.38, "eval/kl": 0.0, "eval/logps/chosen": -146.17608642578125, "eval/logps/rejected": -271.80560302734375, "eval/rewards/chosen": 3.3853886127471924, "eval/rewards/margins": 15.140896081924438, "eval/rewards/rejected": -11.755507469177246, "eval_loss": 0.057219523936510086, "eval_runtime": 1031.0752, "eval_samples_per_second": 0.467, "eval_steps_per_second": 0.117, "step": 50 }, { "epoch": 0.46, "grad_norm": 0.16106899082660675, "learning_rate": 0.00019107611548556432, "loss": 0.0596, "step": 60, "train/kl": 0.1449735164642334, "train/logps/chosen": -131.60433959960938, "train/logps/rejected": -292.1778869628906, "train/rewards/chosen": 3.4192304611206055, "train/rewards/margins": 16.215981483459473, "train/rewards/rejected": -12.796751022338867 }, { "epoch": 0.61, "grad_norm": 0.6279284358024597, "learning_rate": 0.00018582677165354333, "loss": 0.0528, "step": 80, "train/kl": 0.047291625291109085, "train/logps/chosen": -116.19053649902344, "train/logps/rejected": -327.73248291015625, "train/rewards/chosen": 3.602910041809082, "train/rewards/margins": 21.11134433746338, "train/rewards/rejected": -17.508434295654297 }, { "epoch": 0.76, "grad_norm": 1.3628041744232178, "learning_rate": 0.0001805774278215223, "loss": 0.0481, "step": 100, "train/kl": 0.0, "train/logps/chosen": -127.67686462402344, "train/logps/rejected": -339.4940185546875, "train/rewards/chosen": 3.723008155822754, "train/rewards/margins": 21.06360149383545, "train/rewards/rejected": -17.340593338012695 }, { "epoch": 0.76, "eval/kl": 0.0, "eval/logps/chosen": -140.9491424560547, "eval/logps/rejected": -331.2147216796875, "eval/rewards/chosen": 3.908082962036133, "eval/rewards/margins": 21.604503631591797, "eval/rewards/rejected": -17.696420669555664, "eval_loss": 0.04277317598462105, "eval_runtime": 1009.2383, "eval_samples_per_second": 0.478, "eval_steps_per_second": 0.12, "step": 100 }, { "epoch": 0.91, "grad_norm": 0.2859511077404022, "learning_rate": 0.00017532808398950132, "loss": 0.0438, "step": 120, "train/kl": 0.0, "train/logps/chosen": -107.13507080078125, "train/logps/rejected": -362.26336669921875, "train/rewards/chosen": 3.8003525733947754, "train/rewards/margins": 23.768267154693604, "train/rewards/rejected": -19.967914581298828 }, { "epoch": 1.06, "grad_norm": 0.27395951747894287, "learning_rate": 0.00017007874015748033, "loss": 0.0391, "step": 140, "train/kl": 0.0, "train/logps/chosen": -116.65599822998047, "train/logps/rejected": -404.44793701171875, "train/rewards/chosen": 3.8443548679351807, "train/rewards/margins": 28.49311852455139, "train/rewards/rejected": -24.64876365661621 }, { "epoch": 1.14, "eval/kl": 0.0, "eval/logps/chosen": -137.66822814941406, "eval/logps/rejected": -436.40203857421875, "eval/rewards/chosen": 4.236174583435059, "eval/rewards/margins": 32.45132541656494, "eval/rewards/rejected": -28.215150833129883, "eval_loss": 0.03409990668296814, "eval_runtime": 1008.6969, "eval_samples_per_second": 0.478, "eval_steps_per_second": 0.12, "step": 150 }, { "epoch": 1.22, "grad_norm": 0.5016167163848877, "learning_rate": 0.00016482939632545934, "loss": 0.0283, "step": 160, "train/kl": 0.11301638185977936, "train/logps/chosen": -113.57328796386719, "train/logps/rejected": -447.1547546386719, "train/rewards/chosen": 4.402313709259033, "train/rewards/margins": 33.74688768386841, "train/rewards/rejected": -29.344573974609375 }, { "epoch": 1.37, "grad_norm": 0.051358386874198914, "learning_rate": 0.00015958005249343832, "loss": 0.0242, "step": 180, "train/kl": 0.05546154826879501, "train/logps/chosen": -101.12237548828125, "train/logps/rejected": -464.6999206542969, "train/rewards/chosen": 4.362555027008057, "train/rewards/margins": 34.47586107254028, "train/rewards/rejected": -30.113306045532227 }, { "epoch": 1.52, "grad_norm": 0.044250741600990295, "learning_rate": 0.00015433070866141733, "loss": 0.0209, "step": 200, "train/kl": 0.17102402448654175, "train/logps/chosen": -117.83503723144531, "train/logps/rejected": -454.4959411621094, "train/rewards/chosen": 4.654594898223877, "train/rewards/margins": 34.24535036087036, "train/rewards/rejected": -29.590755462646484 }, { "epoch": 1.52, "eval/kl": 0.0, "eval/logps/chosen": -136.6546630859375, "eval/logps/rejected": -475.3577575683594, "eval/rewards/chosen": 4.337530612945557, "eval/rewards/margins": 36.44825220108032, "eval/rewards/rejected": -32.110721588134766, "eval_loss": 0.03174639865756035, "eval_runtime": 1008.2427, "eval_samples_per_second": 0.478, "eval_steps_per_second": 0.12, "step": 200 } ], "logging_steps": 20, "max_steps": 786, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }