|
{ |
|
"best_metric": 0.09428545832633972, |
|
"best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/saa-600/checkpoint-250", |
|
"epoch": 9.777777777777779, |
|
"eval_steps": 50, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 7.07548189163208, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"logits/chosen": -0.4374169409275055, |
|
"logits/rejected": -0.5023793578147888, |
|
"logps/chosen": -1.741115927696228, |
|
"logps/rejected": -2.1606717109680176, |
|
"loss": 1.7946, |
|
"odds_ratio_loss": 15.69953727722168, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.17411158978939056, |
|
"rewards/margins": 0.04195558652281761, |
|
"rewards/rejected": -0.21606719493865967, |
|
"sft_loss": 0.22465327382087708, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 7.023080825805664, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"logits/chosen": -0.42782774567604065, |
|
"logits/rejected": -0.48748907446861267, |
|
"logps/chosen": -1.7547874450683594, |
|
"logps/rejected": -2.1007962226867676, |
|
"loss": 1.8133, |
|
"odds_ratio_loss": 15.838772773742676, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1754787415266037, |
|
"rewards/margins": 0.034600891172885895, |
|
"rewards/rejected": -0.210079625248909, |
|
"sft_loss": 0.22939057648181915, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 8.079118728637695, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits/chosen": -0.4177670478820801, |
|
"logits/rejected": -0.49106597900390625, |
|
"logps/chosen": -1.6719223260879517, |
|
"logps/rejected": -2.094174861907959, |
|
"loss": 1.725, |
|
"odds_ratio_loss": 15.089022636413574, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.1671922504901886, |
|
"rewards/margins": 0.042225271463394165, |
|
"rewards/rejected": -0.20941750705242157, |
|
"sft_loss": 0.21611404418945312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 7.101726531982422, |
|
"learning_rate": 4.993149937871306e-06, |
|
"logits/chosen": -0.42014995217323303, |
|
"logits/rejected": -0.4878144860267639, |
|
"logps/chosen": -1.4801180362701416, |
|
"logps/rejected": -1.8868948221206665, |
|
"loss": 1.5344, |
|
"odds_ratio_loss": 13.476564407348633, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.14801180362701416, |
|
"rewards/margins": 0.040677666664123535, |
|
"rewards/rejected": -0.1886894851922989, |
|
"sft_loss": 0.18675227463245392, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 4.9273481369018555, |
|
"learning_rate": 4.959688949822748e-06, |
|
"logits/chosen": -0.4227227568626404, |
|
"logits/rejected": -0.4957185685634613, |
|
"logps/chosen": -1.2785080671310425, |
|
"logps/rejected": -1.6517393589019775, |
|
"loss": 1.3352, |
|
"odds_ratio_loss": 11.81715202331543, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12785081565380096, |
|
"rewards/margins": 0.03732311353087425, |
|
"rewards/rejected": -0.16517391800880432, |
|
"sft_loss": 0.15344038605690002, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"eval_logits/chosen": -0.40017402172088623, |
|
"eval_logits/rejected": -0.4812173843383789, |
|
"eval_logps/chosen": -0.9889497756958008, |
|
"eval_logps/rejected": -1.5758014917373657, |
|
"eval_loss": 1.0316624641418457, |
|
"eval_odds_ratio_loss": 9.149198532104492, |
|
"eval_rewards/accuracies": 0.8333333134651184, |
|
"eval_rewards/chosen": -0.09889498353004456, |
|
"eval_rewards/margins": 0.058685168623924255, |
|
"eval_rewards/rejected": -0.1575801521539688, |
|
"eval_runtime": 2.3161, |
|
"eval_samples_per_second": 25.906, |
|
"eval_sft_loss": 0.11674254387617111, |
|
"eval_steps_per_second": 12.953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 3.42924427986145, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"logits/chosen": -0.4220319390296936, |
|
"logits/rejected": -0.4980909824371338, |
|
"logps/chosen": -0.89045250415802, |
|
"logps/rejected": -1.3505830764770508, |
|
"loss": 0.9359, |
|
"odds_ratio_loss": 8.349299430847168, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.08904524892568588, |
|
"rewards/margins": 0.04601306468248367, |
|
"rewards/rejected": -0.13505831360816956, |
|
"sft_loss": 0.10094638913869858, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 3.1744749546051025, |
|
"learning_rate": 4.810961790316731e-06, |
|
"logits/chosen": -0.4295685291290283, |
|
"logits/rejected": -0.5065377950668335, |
|
"logps/chosen": -0.5825018882751465, |
|
"logps/rejected": -1.0753108263015747, |
|
"loss": 0.625, |
|
"odds_ratio_loss": 5.649188995361328, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.05825018882751465, |
|
"rewards/margins": 0.049280889332294464, |
|
"rewards/rejected": -0.10753107070922852, |
|
"sft_loss": 0.0600874125957489, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 1.9260555505752563, |
|
"learning_rate": 4.697358159051549e-06, |
|
"logits/chosen": -0.40925922989845276, |
|
"logits/rejected": -0.4894172251224518, |
|
"logps/chosen": -0.44294339418411255, |
|
"logps/rejected": -0.9772504568099976, |
|
"loss": 0.4878, |
|
"odds_ratio_loss": 4.406769275665283, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.0442943349480629, |
|
"rewards/margins": 0.053430717438459396, |
|
"rewards/rejected": -0.097725048661232, |
|
"sft_loss": 0.047148533165454865, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 2.225752115249634, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits/chosen": -0.3779674470424652, |
|
"logits/rejected": -0.4604215621948242, |
|
"logps/chosen": -0.28717148303985596, |
|
"logps/rejected": -0.7895299196243286, |
|
"loss": 0.332, |
|
"odds_ratio_loss": 3.0206964015960693, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.028717149049043655, |
|
"rewards/margins": 0.050235848873853683, |
|
"rewards/rejected": -0.07895299792289734, |
|
"sft_loss": 0.029953395947813988, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 1.3990237712860107, |
|
"learning_rate": 4.398006164494358e-06, |
|
"logits/chosen": -0.4050057828426361, |
|
"logits/rejected": -0.4781204164028168, |
|
"logps/chosen": -0.19231440126895905, |
|
"logps/rejected": -0.5829997062683105, |
|
"loss": 0.2371, |
|
"odds_ratio_loss": 2.1702122688293457, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.019231440499424934, |
|
"rewards/margins": 0.03906853124499321, |
|
"rewards/rejected": -0.058299969881772995, |
|
"sft_loss": 0.02010512165725231, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_logits/chosen": -0.38126423954963684, |
|
"eval_logits/rejected": -0.455107718706131, |
|
"eval_logps/chosen": -0.13484641909599304, |
|
"eval_logps/rejected": -0.6987236142158508, |
|
"eval_loss": 0.16552023589611053, |
|
"eval_odds_ratio_loss": 1.47817862033844, |
|
"eval_rewards/accuracies": 0.8833333253860474, |
|
"eval_rewards/chosen": -0.013484641909599304, |
|
"eval_rewards/margins": 0.05638771876692772, |
|
"eval_rewards/rejected": -0.06987235695123672, |
|
"eval_runtime": 2.3132, |
|
"eval_samples_per_second": 25.938, |
|
"eval_sft_loss": 0.01770237274467945, |
|
"eval_steps_per_second": 12.969, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.259259259259259, |
|
"grad_norm": 1.6745034456253052, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -0.39228641986846924, |
|
"logits/rejected": -0.4650408625602722, |
|
"logps/chosen": -0.14995309710502625, |
|
"logps/rejected": -0.6513184309005737, |
|
"loss": 0.1933, |
|
"odds_ratio_loss": 1.7675580978393555, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.014995308592915535, |
|
"rewards/margins": 0.05013653635978699, |
|
"rewards/rejected": -0.06513184309005737, |
|
"sft_loss": 0.016546962782740593, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 2.232027053833008, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits/chosen": -0.3439103364944458, |
|
"logits/rejected": -0.41849011182785034, |
|
"logps/chosen": -0.1279471218585968, |
|
"logps/rejected": -0.5881286859512329, |
|
"loss": 0.1679, |
|
"odds_ratio_loss": 1.5120834112167358, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.012794713489711285, |
|
"rewards/margins": 0.04601815715432167, |
|
"rewards/rejected": -0.05881286785006523, |
|
"sft_loss": 0.01666136085987091, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.851851851851852, |
|
"grad_norm": 1.1567457914352417, |
|
"learning_rate": 3.7955198860439892e-06, |
|
"logits/chosen": -0.4037134051322937, |
|
"logits/rejected": -0.4531572461128235, |
|
"logps/chosen": -0.10376612842082977, |
|
"logps/rejected": -0.5740376114845276, |
|
"loss": 0.1434, |
|
"odds_ratio_loss": 1.3169727325439453, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.010376612655818462, |
|
"rewards/margins": 0.04702714830636978, |
|
"rewards/rejected": -0.05740376561880112, |
|
"sft_loss": 0.011749515309929848, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.148148148148148, |
|
"grad_norm": 0.8253294229507446, |
|
"learning_rate": 3.5625336406000752e-06, |
|
"logits/chosen": -0.41028180718421936, |
|
"logits/rejected": -0.46746310591697693, |
|
"logps/chosen": -0.09531185775995255, |
|
"logps/rejected": -0.5239280462265015, |
|
"loss": 0.1289, |
|
"odds_ratio_loss": 1.1706035137176514, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.00953118596225977, |
|
"rewards/margins": 0.04286161810159683, |
|
"rewards/rejected": -0.05239280313253403, |
|
"sft_loss": 0.011803574860095978, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.9827601313591003, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits/chosen": -0.3458485007286072, |
|
"logits/rejected": -0.4066559374332428, |
|
"logps/chosen": -0.10192994773387909, |
|
"logps/rejected": -0.5467253923416138, |
|
"loss": 0.1421, |
|
"odds_ratio_loss": 1.2931029796600342, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.010192994959652424, |
|
"rewards/margins": 0.04447954148054123, |
|
"rewards/rejected": -0.05467253923416138, |
|
"sft_loss": 0.01281714253127575, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_logits/chosen": -0.34726279973983765, |
|
"eval_logits/rejected": -0.4106636047363281, |
|
"eval_logps/chosen": -0.0770278051495552, |
|
"eval_logps/rejected": -0.5773364901542664, |
|
"eval_loss": 0.10104309767484665, |
|
"eval_odds_ratio_loss": 0.8869253396987915, |
|
"eval_rewards/accuracies": 0.8833333253860474, |
|
"eval_rewards/chosen": -0.007702780421823263, |
|
"eval_rewards/margins": 0.05003087595105171, |
|
"eval_rewards/rejected": -0.05773365497589111, |
|
"eval_runtime": 2.316, |
|
"eval_samples_per_second": 25.907, |
|
"eval_sft_loss": 0.012350580655038357, |
|
"eval_steps_per_second": 12.953, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.7407407407407405, |
|
"grad_norm": 1.6250287294387817, |
|
"learning_rate": 3.0636658878845116e-06, |
|
"logits/chosen": -0.38808631896972656, |
|
"logits/rejected": -0.45208558440208435, |
|
"logps/chosen": -0.10056424140930176, |
|
"logps/rejected": -0.5426880717277527, |
|
"loss": 0.1333, |
|
"odds_ratio_loss": 1.2175222635269165, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.01005642395466566, |
|
"rewards/margins": 0.04421238973736763, |
|
"rewards/rejected": -0.054268814623355865, |
|
"sft_loss": 0.011499151587486267, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.037037037037037, |
|
"grad_norm": 1.2549740076065063, |
|
"learning_rate": 2.803360952452705e-06, |
|
"logits/chosen": -0.3857024013996124, |
|
"logits/rejected": -0.43612140417099, |
|
"logps/chosen": -0.08464725315570831, |
|
"logps/rejected": -0.4786381125450134, |
|
"loss": 0.1229, |
|
"odds_ratio_loss": 1.1305350065231323, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.008464725688099861, |
|
"rewards/margins": 0.03939909487962723, |
|
"rewards/rejected": -0.04786381870508194, |
|
"sft_loss": 0.00985820684581995, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 2.602710485458374, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits/chosen": -0.32125982642173767, |
|
"logits/rejected": -0.3869190812110901, |
|
"logps/chosen": -0.0981438010931015, |
|
"logps/rejected": -0.6228185892105103, |
|
"loss": 0.1321, |
|
"odds_ratio_loss": 1.208888053894043, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.00981437973678112, |
|
"rewards/margins": 0.05246748402714729, |
|
"rewards/rejected": -0.062281858175992966, |
|
"sft_loss": 0.011162296868860722, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.62962962962963, |
|
"grad_norm": 0.7487705945968628, |
|
"learning_rate": 2.275525474225771e-06, |
|
"logits/chosen": -0.38048022985458374, |
|
"logits/rejected": -0.45359840989112854, |
|
"logps/chosen": -0.08556422591209412, |
|
"logps/rejected": -0.568170428276062, |
|
"loss": 0.118, |
|
"odds_ratio_loss": 1.0685060024261475, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.008556422777473927, |
|
"rewards/margins": 0.04826062172651291, |
|
"rewards/rejected": -0.05681704729795456, |
|
"sft_loss": 0.011130120605230331, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 1.8954200744628906, |
|
"learning_rate": 2.013895317751323e-06, |
|
"logits/chosen": -0.3612784445285797, |
|
"logits/rejected": -0.398723840713501, |
|
"logps/chosen": -0.09013709425926208, |
|
"logps/rejected": -0.47434768080711365, |
|
"loss": 0.1291, |
|
"odds_ratio_loss": 1.1944711208343506, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.009013709612190723, |
|
"rewards/margins": 0.03842105716466904, |
|
"rewards/rejected": -0.047434769570827484, |
|
"sft_loss": 0.00965641625225544, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"eval_logits/chosen": -0.3441879153251648, |
|
"eval_logits/rejected": -0.40659084916114807, |
|
"eval_logps/chosen": -0.07519559562206268, |
|
"eval_logps/rejected": -0.5936176776885986, |
|
"eval_loss": 0.09843841940164566, |
|
"eval_odds_ratio_loss": 0.8613345623016357, |
|
"eval_rewards/accuracies": 0.8833333253860474, |
|
"eval_rewards/chosen": -0.007519559469074011, |
|
"eval_rewards/margins": 0.05184221267700195, |
|
"eval_rewards/rejected": -0.0593617707490921, |
|
"eval_runtime": 2.3134, |
|
"eval_samples_per_second": 25.936, |
|
"eval_sft_loss": 0.012304977513849735, |
|
"eval_steps_per_second": 12.968, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 2.2815189361572266, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits/chosen": -0.3727927803993225, |
|
"logits/rejected": -0.4259300231933594, |
|
"logps/chosen": -0.06345033645629883, |
|
"logps/rejected": -0.5240000486373901, |
|
"loss": 0.0946, |
|
"odds_ratio_loss": 0.8643038868904114, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.006345034576952457, |
|
"rewards/margins": 0.04605497419834137, |
|
"rewards/rejected": -0.052400004118680954, |
|
"sft_loss": 0.008158734068274498, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.518518518518518, |
|
"grad_norm": 1.3194066286087036, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -0.3670283854007721, |
|
"logits/rejected": -0.427605003118515, |
|
"logps/chosen": -0.09667733311653137, |
|
"logps/rejected": -0.5639557838439941, |
|
"loss": 0.1333, |
|
"odds_ratio_loss": 1.2246453762054443, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.009667733684182167, |
|
"rewards/margins": 0.04672784358263016, |
|
"rewards/rejected": -0.056395579129457474, |
|
"sft_loss": 0.010851002298295498, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.814814814814815, |
|
"grad_norm": 1.5913020372390747, |
|
"learning_rate": 1.2729710099410802e-06, |
|
"logits/chosen": -0.3422110676765442, |
|
"logits/rejected": -0.41096681356430054, |
|
"logps/chosen": -0.07137643545866013, |
|
"logps/rejected": -0.5844155550003052, |
|
"loss": 0.0972, |
|
"odds_ratio_loss": 0.8859140276908875, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.007137644104659557, |
|
"rewards/margins": 0.05130390450358391, |
|
"rewards/rejected": -0.05844154953956604, |
|
"sft_loss": 0.008584940806031227, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 1.441452980041504, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits/chosen": -0.37981483340263367, |
|
"logits/rejected": -0.42586684226989746, |
|
"logps/chosen": -0.09972624480724335, |
|
"logps/rejected": -0.5454004406929016, |
|
"loss": 0.1338, |
|
"odds_ratio_loss": 1.2288706302642822, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.009972624480724335, |
|
"rewards/margins": 0.044567424803972244, |
|
"rewards/rejected": -0.05454004928469658, |
|
"sft_loss": 0.010933582670986652, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 2.5117592811584473, |
|
"learning_rate": 8.4295479559726e-07, |
|
"logits/chosen": -0.38271045684814453, |
|
"logits/rejected": -0.4315881133079529, |
|
"logps/chosen": -0.0871758908033371, |
|
"logps/rejected": -0.5703214406967163, |
|
"loss": 0.1246, |
|
"odds_ratio_loss": 1.1464191675186157, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.00871758908033371, |
|
"rewards/margins": 0.04831455647945404, |
|
"rewards/rejected": -0.05703214555978775, |
|
"sft_loss": 0.009948917664587498, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"eval_logits/chosen": -0.34323057532310486, |
|
"eval_logits/rejected": -0.4047623574733734, |
|
"eval_logps/chosen": -0.07215116173028946, |
|
"eval_logps/rejected": -0.6233159303665161, |
|
"eval_loss": 0.09428545832633972, |
|
"eval_odds_ratio_loss": 0.8242944478988647, |
|
"eval_rewards/accuracies": 0.8833333253860474, |
|
"eval_rewards/chosen": -0.0072151171043515205, |
|
"eval_rewards/margins": 0.05511648207902908, |
|
"eval_rewards/rejected": -0.06233159825205803, |
|
"eval_runtime": 2.3121, |
|
"eval_samples_per_second": 25.95, |
|
"eval_sft_loss": 0.01185599621385336, |
|
"eval_steps_per_second": 12.975, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.703703703703704, |
|
"grad_norm": 1.5904881954193115, |
|
"learning_rate": 6.545750740770338e-07, |
|
"logits/chosen": -0.3598392605781555, |
|
"logits/rejected": -0.423635333776474, |
|
"logps/chosen": -0.06470540165901184, |
|
"logps/rejected": -0.5819328427314758, |
|
"loss": 0.0906, |
|
"odds_ratio_loss": 0.8195532560348511, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.006470539607107639, |
|
"rewards/margins": 0.05172274261713028, |
|
"rewards/rejected": -0.05819328501820564, |
|
"sft_loss": 0.008671595714986324, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.6165652871131897, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits/chosen": -0.34859612584114075, |
|
"logits/rejected": -0.4086515009403229, |
|
"logps/chosen": -0.09018560498952866, |
|
"logps/rejected": -0.587788462638855, |
|
"loss": 0.1201, |
|
"odds_ratio_loss": 1.098928689956665, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.009018560871481895, |
|
"rewards/margins": 0.04976029321551323, |
|
"rewards/rejected": -0.058778852224349976, |
|
"sft_loss": 0.010236375033855438, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.296296296296296, |
|
"grad_norm": 1.252172589302063, |
|
"learning_rate": 3.4157783610952263e-07, |
|
"logits/chosen": -0.3684031367301941, |
|
"logits/rejected": -0.4260830283164978, |
|
"logps/chosen": -0.0856148824095726, |
|
"logps/rejected": -0.5833510756492615, |
|
"loss": 0.1153, |
|
"odds_ratio_loss": 1.06239914894104, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.008561487309634686, |
|
"rewards/margins": 0.049773626029491425, |
|
"rewards/rejected": -0.058335114270448685, |
|
"sft_loss": 0.009099993854761124, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.592592592592592, |
|
"grad_norm": 1.9929240942001343, |
|
"learning_rate": 2.2045914590165252e-07, |
|
"logits/chosen": -0.4020005166530609, |
|
"logits/rejected": -0.46092405915260315, |
|
"logps/chosen": -0.07739080488681793, |
|
"logps/rejected": -0.62553870677948, |
|
"loss": 0.1022, |
|
"odds_ratio_loss": 0.9326642155647278, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.007739080581814051, |
|
"rewards/margins": 0.054814793169498444, |
|
"rewards/rejected": -0.06255386769771576, |
|
"sft_loss": 0.008896315470337868, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.9893295764923096, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits/chosen": -0.34586095809936523, |
|
"logits/rejected": -0.39634814858436584, |
|
"logps/chosen": -0.07016898691654205, |
|
"logps/rejected": -0.5465742349624634, |
|
"loss": 0.1045, |
|
"odds_ratio_loss": 0.9643081426620483, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.0070168995298445225, |
|
"rewards/margins": 0.04764052852988243, |
|
"rewards/rejected": -0.054657429456710815, |
|
"sft_loss": 0.008084597066044807, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_logits/chosen": -0.34323617815971375, |
|
"eval_logits/rejected": -0.40456622838974, |
|
"eval_logps/chosen": -0.07242080569267273, |
|
"eval_logps/rejected": -0.6277292966842651, |
|
"eval_loss": 0.09481088072061539, |
|
"eval_odds_ratio_loss": 0.829154908657074, |
|
"eval_rewards/accuracies": 0.8833333253860474, |
|
"eval_rewards/chosen": -0.007242080755531788, |
|
"eval_rewards/margins": 0.0555308535695076, |
|
"eval_rewards/rejected": -0.06277292966842651, |
|
"eval_runtime": 2.3109, |
|
"eval_samples_per_second": 25.964, |
|
"eval_sft_loss": 0.011895372532308102, |
|
"eval_steps_per_second": 12.982, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.185185185185185, |
|
"grad_norm": 1.7534313201904297, |
|
"learning_rate": 5.573608879422876e-08, |
|
"logits/chosen": -0.35441476106643677, |
|
"logits/rejected": -0.4039112627506256, |
|
"logps/chosen": -0.08877753466367722, |
|
"logps/rejected": -0.5493366718292236, |
|
"loss": 0.1254, |
|
"odds_ratio_loss": 1.14632248878479, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.008877754211425781, |
|
"rewards/margins": 0.046055909246206284, |
|
"rewards/rejected": -0.05493366718292236, |
|
"sft_loss": 0.01074306946247816, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.481481481481481, |
|
"grad_norm": 1.6547372341156006, |
|
"learning_rate": 1.3973071544233219e-08, |
|
"logits/chosen": -0.37551018595695496, |
|
"logits/rejected": -0.4359508454799652, |
|
"logps/chosen": -0.07044418156147003, |
|
"logps/rejected": -0.5796228647232056, |
|
"loss": 0.098, |
|
"odds_ratio_loss": 0.9046875834465027, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.007044418249279261, |
|
"rewards/margins": 0.05091787129640579, |
|
"rewards/rejected": -0.05796227976679802, |
|
"sft_loss": 0.007553444243967533, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 1.2167924642562866, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.36357811093330383, |
|
"logits/rejected": -0.4220617711544037, |
|
"logps/chosen": -0.0854811817407608, |
|
"logps/rejected": -0.5617056488990784, |
|
"loss": 0.1206, |
|
"odds_ratio_loss": 1.0994065999984741, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.008548117242753506, |
|
"rewards/margins": 0.047622449696063995, |
|
"rewards/rejected": -0.05617056414484978, |
|
"sft_loss": 0.010644225403666496, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"step": 330, |
|
"total_flos": 5.97337003547689e+16, |
|
"train_loss": 0.41489303653890436, |
|
"train_runtime": 721.1538, |
|
"train_samples_per_second": 7.488, |
|
"train_steps_per_second": 0.458 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.97337003547689e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|