{ "best_metric": 0.09428545832633972, "best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/saa-600/checkpoint-250", "epoch": 9.777777777777779, "eval_steps": 50, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2962962962962963, "grad_norm": 7.07548189163208, "learning_rate": 1.5151515151515152e-06, "logits/chosen": -0.4374169409275055, "logits/rejected": -0.5023793578147888, "logps/chosen": -1.741115927696228, "logps/rejected": -2.1606717109680176, "loss": 1.7946, "odds_ratio_loss": 15.69953727722168, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.17411158978939056, "rewards/margins": 0.04195558652281761, "rewards/rejected": -0.21606719493865967, "sft_loss": 0.22465327382087708, "step": 10 }, { "epoch": 0.5925925925925926, "grad_norm": 7.023080825805664, "learning_rate": 3.0303030303030305e-06, "logits/chosen": -0.42782774567604065, "logits/rejected": -0.48748907446861267, "logps/chosen": -1.7547874450683594, "logps/rejected": -2.1007962226867676, "loss": 1.8133, "odds_ratio_loss": 15.838772773742676, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1754787415266037, "rewards/margins": 0.034600891172885895, "rewards/rejected": -0.210079625248909, "sft_loss": 0.22939057648181915, "step": 20 }, { "epoch": 0.8888888888888888, "grad_norm": 8.079118728637695, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -0.4177670478820801, "logits/rejected": -0.49106597900390625, "logps/chosen": -1.6719223260879517, "logps/rejected": -2.094174861907959, "loss": 1.725, "odds_ratio_loss": 15.089022636413574, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1671922504901886, "rewards/margins": 0.042225271463394165, "rewards/rejected": -0.20941750705242157, "sft_loss": 0.21611404418945312, "step": 30 }, { "epoch": 1.1851851851851851, "grad_norm": 7.101726531982422, "learning_rate": 4.993149937871306e-06, "logits/chosen": -0.42014995217323303, "logits/rejected": -0.4878144860267639, "logps/chosen": -1.4801180362701416, "logps/rejected": -1.8868948221206665, "loss": 1.5344, "odds_ratio_loss": 13.476564407348633, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.14801180362701416, "rewards/margins": 0.040677666664123535, "rewards/rejected": -0.1886894851922989, "sft_loss": 0.18675227463245392, "step": 40 }, { "epoch": 1.4814814814814814, "grad_norm": 4.9273481369018555, "learning_rate": 4.959688949822748e-06, "logits/chosen": -0.4227227568626404, "logits/rejected": -0.4957185685634613, "logps/chosen": -1.2785080671310425, "logps/rejected": -1.6517393589019775, "loss": 1.3352, "odds_ratio_loss": 11.81715202331543, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12785081565380096, "rewards/margins": 0.03732311353087425, "rewards/rejected": -0.16517391800880432, "sft_loss": 0.15344038605690002, "step": 50 }, { "epoch": 1.4814814814814814, "eval_logits/chosen": -0.40017402172088623, "eval_logits/rejected": -0.4812173843383789, "eval_logps/chosen": -0.9889497756958008, "eval_logps/rejected": -1.5758014917373657, "eval_loss": 1.0316624641418457, "eval_odds_ratio_loss": 9.149198532104492, "eval_rewards/accuracies": 0.8333333134651184, "eval_rewards/chosen": -0.09889498353004456, "eval_rewards/margins": 0.058685168623924255, "eval_rewards/rejected": -0.1575801521539688, "eval_runtime": 2.3161, "eval_samples_per_second": 25.906, "eval_sft_loss": 0.11674254387617111, "eval_steps_per_second": 12.953, "step": 50 }, { "epoch": 1.7777777777777777, "grad_norm": 3.42924427986145, "learning_rate": 4.8987324340362445e-06, "logits/chosen": -0.4220319390296936, "logits/rejected": -0.4980909824371338, "logps/chosen": -0.89045250415802, "logps/rejected": -1.3505830764770508, "loss": 0.9359, "odds_ratio_loss": 8.349299430847168, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08904524892568588, "rewards/margins": 0.04601306468248367, "rewards/rejected": -0.13505831360816956, "sft_loss": 0.10094638913869858, "step": 60 }, { "epoch": 2.074074074074074, "grad_norm": 3.1744749546051025, "learning_rate": 4.810961790316731e-06, "logits/chosen": -0.4295685291290283, "logits/rejected": -0.5065377950668335, "logps/chosen": -0.5825018882751465, "logps/rejected": -1.0753108263015747, "loss": 0.625, "odds_ratio_loss": 5.649188995361328, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.05825018882751465, "rewards/margins": 0.049280889332294464, "rewards/rejected": -0.10753107070922852, "sft_loss": 0.0600874125957489, "step": 70 }, { "epoch": 2.3703703703703702, "grad_norm": 1.9260555505752563, "learning_rate": 4.697358159051549e-06, "logits/chosen": -0.40925922989845276, "logits/rejected": -0.4894172251224518, "logps/chosen": -0.44294339418411255, "logps/rejected": -0.9772504568099976, "loss": 0.4878, "odds_ratio_loss": 4.406769275665283, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.0442943349480629, "rewards/margins": 0.053430717438459396, "rewards/rejected": -0.097725048661232, "sft_loss": 0.047148533165454865, "step": 80 }, { "epoch": 2.6666666666666665, "grad_norm": 2.225752115249634, "learning_rate": 4.559191453574582e-06, "logits/chosen": -0.3779674470424652, "logits/rejected": -0.4604215621948242, "logps/chosen": -0.28717148303985596, "logps/rejected": -0.7895299196243286, "loss": 0.332, "odds_ratio_loss": 3.0206964015960693, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.028717149049043655, "rewards/margins": 0.050235848873853683, "rewards/rejected": -0.07895299792289734, "sft_loss": 0.029953395947813988, "step": 90 }, { "epoch": 2.962962962962963, "grad_norm": 1.3990237712860107, "learning_rate": 4.398006164494358e-06, "logits/chosen": -0.4050057828426361, "logits/rejected": -0.4781204164028168, "logps/chosen": -0.19231440126895905, "logps/rejected": -0.5829997062683105, "loss": 0.2371, "odds_ratio_loss": 2.1702122688293457, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.019231440499424934, "rewards/margins": 0.03906853124499321, "rewards/rejected": -0.058299969881772995, "sft_loss": 0.02010512165725231, "step": 100 }, { "epoch": 2.962962962962963, "eval_logits/chosen": -0.38126423954963684, "eval_logits/rejected": -0.455107718706131, "eval_logps/chosen": -0.13484641909599304, "eval_logps/rejected": -0.6987236142158508, "eval_loss": 0.16552023589611053, "eval_odds_ratio_loss": 1.47817862033844, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.013484641909599304, "eval_rewards/margins": 0.05638771876692772, "eval_rewards/rejected": -0.06987235695123672, "eval_runtime": 2.3132, "eval_samples_per_second": 25.938, "eval_sft_loss": 0.01770237274467945, "eval_steps_per_second": 12.969, "step": 100 }, { "epoch": 3.259259259259259, "grad_norm": 1.6745034456253052, "learning_rate": 4.215604094671835e-06, "logits/chosen": -0.39228641986846924, "logits/rejected": -0.4650408625602722, "logps/chosen": -0.14995309710502625, "logps/rejected": -0.6513184309005737, "loss": 0.1933, "odds_ratio_loss": 1.7675580978393555, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.014995308592915535, "rewards/margins": 0.05013653635978699, "rewards/rejected": -0.06513184309005737, "sft_loss": 0.016546962782740593, "step": 110 }, { "epoch": 3.5555555555555554, "grad_norm": 2.232027053833008, "learning_rate": 4.014024217844167e-06, "logits/chosen": -0.3439103364944458, "logits/rejected": -0.41849011182785034, "logps/chosen": -0.1279471218585968, "logps/rejected": -0.5881286859512329, "loss": 0.1679, "odds_ratio_loss": 1.5120834112167358, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.012794713489711285, "rewards/margins": 0.04601815715432167, "rewards/rejected": -0.05881286785006523, "sft_loss": 0.01666136085987091, "step": 120 }, { "epoch": 3.851851851851852, "grad_norm": 1.1567457914352417, "learning_rate": 3.7955198860439892e-06, "logits/chosen": -0.4037134051322937, "logits/rejected": -0.4531572461128235, "logps/chosen": -0.10376612842082977, "logps/rejected": -0.5740376114845276, "loss": 0.1434, "odds_ratio_loss": 1.3169727325439453, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.010376612655818462, "rewards/margins": 0.04702714830636978, "rewards/rejected": -0.05740376561880112, "sft_loss": 0.011749515309929848, "step": 130 }, { "epoch": 4.148148148148148, "grad_norm": 0.8253294229507446, "learning_rate": 3.5625336406000752e-06, "logits/chosen": -0.41028180718421936, "logits/rejected": -0.46746310591697693, "logps/chosen": -0.09531185775995255, "logps/rejected": -0.5239280462265015, "loss": 0.1289, "odds_ratio_loss": 1.1706035137176514, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.00953118596225977, "rewards/margins": 0.04286161810159683, "rewards/rejected": -0.05239280313253403, "sft_loss": 0.011803574860095978, "step": 140 }, { "epoch": 4.444444444444445, "grad_norm": 0.9827601313591003, "learning_rate": 3.3176699082935546e-06, "logits/chosen": -0.3458485007286072, "logits/rejected": -0.4066559374332428, "logps/chosen": -0.10192994773387909, "logps/rejected": -0.5467253923416138, "loss": 0.1421, "odds_ratio_loss": 1.2931029796600342, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.010192994959652424, "rewards/margins": 0.04447954148054123, "rewards/rejected": -0.05467253923416138, "sft_loss": 0.01281714253127575, "step": 150 }, { "epoch": 4.444444444444445, "eval_logits/chosen": -0.34726279973983765, "eval_logits/rejected": -0.4106636047363281, "eval_logps/chosen": -0.0770278051495552, "eval_logps/rejected": -0.5773364901542664, "eval_loss": 0.10104309767484665, "eval_odds_ratio_loss": 0.8869253396987915, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.007702780421823263, "eval_rewards/margins": 0.05003087595105171, "eval_rewards/rejected": -0.05773365497589111, "eval_runtime": 2.316, "eval_samples_per_second": 25.907, "eval_sft_loss": 0.012350580655038357, "eval_steps_per_second": 12.953, "step": 150 }, { "epoch": 4.7407407407407405, "grad_norm": 1.6250287294387817, "learning_rate": 3.0636658878845116e-06, "logits/chosen": -0.38808631896972656, "logits/rejected": -0.45208558440208435, "logps/chosen": -0.10056424140930176, "logps/rejected": -0.5426880717277527, "loss": 0.1333, "odds_ratio_loss": 1.2175222635269165, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.01005642395466566, "rewards/margins": 0.04421238973736763, "rewards/rejected": -0.054268814623355865, "sft_loss": 0.011499151587486267, "step": 160 }, { "epoch": 5.037037037037037, "grad_norm": 1.2549740076065063, "learning_rate": 2.803360952452705e-06, "logits/chosen": -0.3857024013996124, "logits/rejected": -0.43612140417099, "logps/chosen": -0.08464725315570831, "logps/rejected": -0.4786381125450134, "loss": 0.1229, "odds_ratio_loss": 1.1305350065231323, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.008464725688099861, "rewards/margins": 0.03939909487962723, "rewards/rejected": -0.04786381870508194, "sft_loss": 0.00985820684581995, "step": 170 }, { "epoch": 5.333333333333333, "grad_norm": 2.602710485458374, "learning_rate": 2.53966490958702e-06, "logits/chosen": -0.32125982642173767, "logits/rejected": -0.3869190812110901, "logps/chosen": -0.0981438010931015, "logps/rejected": -0.6228185892105103, "loss": 0.1321, "odds_ratio_loss": 1.208888053894043, "rewards/accuracies": 0.84375, "rewards/chosen": -0.00981437973678112, "rewards/margins": 0.05246748402714729, "rewards/rejected": -0.062281858175992966, "sft_loss": 0.011162296868860722, "step": 180 }, { "epoch": 5.62962962962963, "grad_norm": 0.7487705945968628, "learning_rate": 2.275525474225771e-06, "logits/chosen": -0.38048022985458374, "logits/rejected": -0.45359840989112854, "logps/chosen": -0.08556422591209412, "logps/rejected": -0.568170428276062, "loss": 0.118, "odds_ratio_loss": 1.0685060024261475, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.008556422777473927, "rewards/margins": 0.04826062172651291, "rewards/rejected": -0.05681704729795456, "sft_loss": 0.011130120605230331, "step": 190 }, { "epoch": 5.925925925925926, "grad_norm": 1.8954200744628906, "learning_rate": 2.013895317751323e-06, "logits/chosen": -0.3612784445285797, "logits/rejected": -0.398723840713501, "logps/chosen": -0.09013709425926208, "logps/rejected": -0.47434768080711365, "loss": 0.1291, "odds_ratio_loss": 1.1944711208343506, "rewards/accuracies": 0.78125, "rewards/chosen": -0.009013709612190723, "rewards/margins": 0.03842105716466904, "rewards/rejected": -0.047434769570827484, "sft_loss": 0.00965641625225544, "step": 200 }, { "epoch": 5.925925925925926, "eval_logits/chosen": -0.3441879153251648, "eval_logits/rejected": -0.40659084916114807, "eval_logps/chosen": -0.07519559562206268, "eval_logps/rejected": -0.5936176776885986, "eval_loss": 0.09843841940164566, "eval_odds_ratio_loss": 0.8613345623016357, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.007519559469074011, "eval_rewards/margins": 0.05184221267700195, "eval_rewards/rejected": -0.0593617707490921, "eval_runtime": 2.3134, "eval_samples_per_second": 25.936, "eval_sft_loss": 0.012304977513849735, "eval_steps_per_second": 12.968, "step": 200 }, { "epoch": 6.222222222222222, "grad_norm": 2.2815189361572266, "learning_rate": 1.7576990616793139e-06, "logits/chosen": -0.3727927803993225, "logits/rejected": -0.4259300231933594, "logps/chosen": -0.06345033645629883, "logps/rejected": -0.5240000486373901, "loss": 0.0946, "odds_ratio_loss": 0.8643038868904114, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.006345034576952457, "rewards/margins": 0.04605497419834137, "rewards/rejected": -0.052400004118680954, "sft_loss": 0.008158734068274498, "step": 210 }, { "epoch": 6.518518518518518, "grad_norm": 1.3194066286087036, "learning_rate": 1.509800584902108e-06, "logits/chosen": -0.3670283854007721, "logits/rejected": -0.427605003118515, "logps/chosen": -0.09667733311653137, "logps/rejected": -0.5639557838439941, "loss": 0.1333, "odds_ratio_loss": 1.2246453762054443, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.009667733684182167, "rewards/margins": 0.04672784358263016, "rewards/rejected": -0.056395579129457474, "sft_loss": 0.010851002298295498, "step": 220 }, { "epoch": 6.814814814814815, "grad_norm": 1.5913020372390747, "learning_rate": 1.2729710099410802e-06, "logits/chosen": -0.3422110676765442, "logits/rejected": -0.41096681356430054, "logps/chosen": -0.07137643545866013, "logps/rejected": -0.5844155550003052, "loss": 0.0972, "odds_ratio_loss": 0.8859140276908875, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.007137644104659557, "rewards/margins": 0.05130390450358391, "rewards/rejected": -0.05844154953956604, "sft_loss": 0.008584940806031227, "step": 230 }, { "epoch": 7.111111111111111, "grad_norm": 1.441452980041504, "learning_rate": 1.049857726072005e-06, "logits/chosen": -0.37981483340263367, "logits/rejected": -0.42586684226989746, "logps/chosen": -0.09972624480724335, "logps/rejected": -0.5454004406929016, "loss": 0.1338, "odds_ratio_loss": 1.2288706302642822, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.009972624480724335, "rewards/margins": 0.044567424803972244, "rewards/rejected": -0.05454004928469658, "sft_loss": 0.010933582670986652, "step": 240 }, { "epoch": 7.407407407407407, "grad_norm": 2.5117592811584473, "learning_rate": 8.4295479559726e-07, "logits/chosen": -0.38271045684814453, "logits/rejected": -0.4315881133079529, "logps/chosen": -0.0871758908033371, "logps/rejected": -0.5703214406967163, "loss": 0.1246, "odds_ratio_loss": 1.1464191675186157, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.00871758908033371, "rewards/margins": 0.04831455647945404, "rewards/rejected": -0.05703214555978775, "sft_loss": 0.009948917664587498, "step": 250 }, { "epoch": 7.407407407407407, "eval_logits/chosen": -0.34323057532310486, "eval_logits/rejected": -0.4047623574733734, "eval_logps/chosen": -0.07215116173028946, "eval_logps/rejected": -0.6233159303665161, "eval_loss": 0.09428545832633972, "eval_odds_ratio_loss": 0.8242944478988647, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.0072151171043515205, "eval_rewards/margins": 0.05511648207902908, "eval_rewards/rejected": -0.06233159825205803, "eval_runtime": 2.3121, "eval_samples_per_second": 25.95, "eval_sft_loss": 0.01185599621385336, "eval_steps_per_second": 12.975, "step": 250 }, { "epoch": 7.703703703703704, "grad_norm": 1.5904881954193115, "learning_rate": 6.545750740770338e-07, "logits/chosen": -0.3598392605781555, "logits/rejected": -0.423635333776474, "logps/chosen": -0.06470540165901184, "logps/rejected": -0.5819328427314758, "loss": 0.0906, "odds_ratio_loss": 0.8195532560348511, "rewards/accuracies": 0.84375, "rewards/chosen": -0.006470539607107639, "rewards/margins": 0.05172274261713028, "rewards/rejected": -0.05819328501820564, "sft_loss": 0.008671595714986324, "step": 260 }, { "epoch": 8.0, "grad_norm": 0.6165652871131897, "learning_rate": 4.868243561723535e-07, "logits/chosen": -0.34859612584114075, "logits/rejected": -0.4086515009403229, "logps/chosen": -0.09018560498952866, "logps/rejected": -0.587788462638855, "loss": 0.1201, "odds_ratio_loss": 1.098928689956665, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.009018560871481895, "rewards/margins": 0.04976029321551323, "rewards/rejected": -0.058778852224349976, "sft_loss": 0.010236375033855438, "step": 270 }, { "epoch": 8.296296296296296, "grad_norm": 1.252172589302063, "learning_rate": 3.4157783610952263e-07, "logits/chosen": -0.3684031367301941, "logits/rejected": -0.4260830283164978, "logps/chosen": -0.0856148824095726, "logps/rejected": -0.5833510756492615, "loss": 0.1153, "odds_ratio_loss": 1.06239914894104, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.008561487309634686, "rewards/margins": 0.049773626029491425, "rewards/rejected": -0.058335114270448685, "sft_loss": 0.009099993854761124, "step": 280 }, { "epoch": 8.592592592592592, "grad_norm": 1.9929240942001343, "learning_rate": 2.2045914590165252e-07, "logits/chosen": -0.4020005166530609, "logits/rejected": -0.46092405915260315, "logps/chosen": -0.07739080488681793, "logps/rejected": -0.62553870677948, "loss": 0.1022, "odds_ratio_loss": 0.9326642155647278, "rewards/accuracies": 0.84375, "rewards/chosen": -0.007739080581814051, "rewards/margins": 0.054814793169498444, "rewards/rejected": -0.06255386769771576, "sft_loss": 0.008896315470337868, "step": 290 }, { "epoch": 8.88888888888889, "grad_norm": 0.9893295764923096, "learning_rate": 1.2482220564763669e-07, "logits/chosen": -0.34586095809936523, "logits/rejected": -0.39634814858436584, "logps/chosen": -0.07016898691654205, "logps/rejected": -0.5465742349624634, "loss": 0.1045, "odds_ratio_loss": 0.9643081426620483, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.0070168995298445225, "rewards/margins": 0.04764052852988243, "rewards/rejected": -0.054657429456710815, "sft_loss": 0.008084597066044807, "step": 300 }, { "epoch": 8.88888888888889, "eval_logits/chosen": -0.34323617815971375, "eval_logits/rejected": -0.40456622838974, "eval_logps/chosen": -0.07242080569267273, "eval_logps/rejected": -0.6277292966842651, "eval_loss": 0.09481088072061539, "eval_odds_ratio_loss": 0.829154908657074, "eval_rewards/accuracies": 0.8833333253860474, "eval_rewards/chosen": -0.007242080755531788, "eval_rewards/margins": 0.0555308535695076, "eval_rewards/rejected": -0.06277292966842651, "eval_runtime": 2.3109, "eval_samples_per_second": 25.964, "eval_sft_loss": 0.011895372532308102, "eval_steps_per_second": 12.982, "step": 300 }, { "epoch": 9.185185185185185, "grad_norm": 1.7534313201904297, "learning_rate": 5.573608879422876e-08, "logits/chosen": -0.35441476106643677, "logits/rejected": -0.4039112627506256, "logps/chosen": -0.08877753466367722, "logps/rejected": -0.5493366718292236, "loss": 0.1254, "odds_ratio_loss": 1.14632248878479, "rewards/accuracies": 0.8125, "rewards/chosen": -0.008877754211425781, "rewards/margins": 0.046055909246206284, "rewards/rejected": -0.05493366718292236, "sft_loss": 0.01074306946247816, "step": 310 }, { "epoch": 9.481481481481481, "grad_norm": 1.6547372341156006, "learning_rate": 1.3973071544233219e-08, "logits/chosen": -0.37551018595695496, "logits/rejected": -0.4359508454799652, "logps/chosen": -0.07044418156147003, "logps/rejected": -0.5796228647232056, "loss": 0.098, "odds_ratio_loss": 0.9046875834465027, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.007044418249279261, "rewards/margins": 0.05091787129640579, "rewards/rejected": -0.05796227976679802, "sft_loss": 0.007553444243967533, "step": 320 }, { "epoch": 9.777777777777779, "grad_norm": 1.2167924642562866, "learning_rate": 0.0, "logits/chosen": -0.36357811093330383, "logits/rejected": -0.4220617711544037, "logps/chosen": -0.0854811817407608, "logps/rejected": -0.5617056488990784, "loss": 0.1206, "odds_ratio_loss": 1.0994065999984741, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.008548117242753506, "rewards/margins": 0.047622449696063995, "rewards/rejected": -0.05617056414484978, "sft_loss": 0.010644225403666496, "step": 330 }, { "epoch": 9.777777777777779, "step": 330, "total_flos": 5.97337003547689e+16, "train_loss": 0.41489303653890436, "train_runtime": 721.1538, "train_samples_per_second": 7.488, "train_steps_per_second": 0.458 } ], "logging_steps": 10, "max_steps": 330, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.97337003547689e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }