Llama-3.1-8B-Instruct-SAA-600 / trainer_state.json
chchen's picture
End of training
680afe0 verified
{
"best_metric": 0.09428545832633972,
"best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/saa-600/checkpoint-250",
"epoch": 9.777777777777779,
"eval_steps": 50,
"global_step": 330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2962962962962963,
"grad_norm": 7.07548189163208,
"learning_rate": 1.5151515151515152e-06,
"logits/chosen": -0.4374169409275055,
"logits/rejected": -0.5023793578147888,
"logps/chosen": -1.741115927696228,
"logps/rejected": -2.1606717109680176,
"loss": 1.7946,
"odds_ratio_loss": 15.69953727722168,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.17411158978939056,
"rewards/margins": 0.04195558652281761,
"rewards/rejected": -0.21606719493865967,
"sft_loss": 0.22465327382087708,
"step": 10
},
{
"epoch": 0.5925925925925926,
"grad_norm": 7.023080825805664,
"learning_rate": 3.0303030303030305e-06,
"logits/chosen": -0.42782774567604065,
"logits/rejected": -0.48748907446861267,
"logps/chosen": -1.7547874450683594,
"logps/rejected": -2.1007962226867676,
"loss": 1.8133,
"odds_ratio_loss": 15.838772773742676,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1754787415266037,
"rewards/margins": 0.034600891172885895,
"rewards/rejected": -0.210079625248909,
"sft_loss": 0.22939057648181915,
"step": 20
},
{
"epoch": 0.8888888888888888,
"grad_norm": 8.079118728637695,
"learning_rate": 4.5454545454545455e-06,
"logits/chosen": -0.4177670478820801,
"logits/rejected": -0.49106597900390625,
"logps/chosen": -1.6719223260879517,
"logps/rejected": -2.094174861907959,
"loss": 1.725,
"odds_ratio_loss": 15.089022636413574,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.1671922504901886,
"rewards/margins": 0.042225271463394165,
"rewards/rejected": -0.20941750705242157,
"sft_loss": 0.21611404418945312,
"step": 30
},
{
"epoch": 1.1851851851851851,
"grad_norm": 7.101726531982422,
"learning_rate": 4.993149937871306e-06,
"logits/chosen": -0.42014995217323303,
"logits/rejected": -0.4878144860267639,
"logps/chosen": -1.4801180362701416,
"logps/rejected": -1.8868948221206665,
"loss": 1.5344,
"odds_ratio_loss": 13.476564407348633,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.14801180362701416,
"rewards/margins": 0.040677666664123535,
"rewards/rejected": -0.1886894851922989,
"sft_loss": 0.18675227463245392,
"step": 40
},
{
"epoch": 1.4814814814814814,
"grad_norm": 4.9273481369018555,
"learning_rate": 4.959688949822748e-06,
"logits/chosen": -0.4227227568626404,
"logits/rejected": -0.4957185685634613,
"logps/chosen": -1.2785080671310425,
"logps/rejected": -1.6517393589019775,
"loss": 1.3352,
"odds_ratio_loss": 11.81715202331543,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.12785081565380096,
"rewards/margins": 0.03732311353087425,
"rewards/rejected": -0.16517391800880432,
"sft_loss": 0.15344038605690002,
"step": 50
},
{
"epoch": 1.4814814814814814,
"eval_logits/chosen": -0.40017402172088623,
"eval_logits/rejected": -0.4812173843383789,
"eval_logps/chosen": -0.9889497756958008,
"eval_logps/rejected": -1.5758014917373657,
"eval_loss": 1.0316624641418457,
"eval_odds_ratio_loss": 9.149198532104492,
"eval_rewards/accuracies": 0.8333333134651184,
"eval_rewards/chosen": -0.09889498353004456,
"eval_rewards/margins": 0.058685168623924255,
"eval_rewards/rejected": -0.1575801521539688,
"eval_runtime": 2.3161,
"eval_samples_per_second": 25.906,
"eval_sft_loss": 0.11674254387617111,
"eval_steps_per_second": 12.953,
"step": 50
},
{
"epoch": 1.7777777777777777,
"grad_norm": 3.42924427986145,
"learning_rate": 4.8987324340362445e-06,
"logits/chosen": -0.4220319390296936,
"logits/rejected": -0.4980909824371338,
"logps/chosen": -0.89045250415802,
"logps/rejected": -1.3505830764770508,
"loss": 0.9359,
"odds_ratio_loss": 8.349299430847168,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.08904524892568588,
"rewards/margins": 0.04601306468248367,
"rewards/rejected": -0.13505831360816956,
"sft_loss": 0.10094638913869858,
"step": 60
},
{
"epoch": 2.074074074074074,
"grad_norm": 3.1744749546051025,
"learning_rate": 4.810961790316731e-06,
"logits/chosen": -0.4295685291290283,
"logits/rejected": -0.5065377950668335,
"logps/chosen": -0.5825018882751465,
"logps/rejected": -1.0753108263015747,
"loss": 0.625,
"odds_ratio_loss": 5.649188995361328,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.05825018882751465,
"rewards/margins": 0.049280889332294464,
"rewards/rejected": -0.10753107070922852,
"sft_loss": 0.0600874125957489,
"step": 70
},
{
"epoch": 2.3703703703703702,
"grad_norm": 1.9260555505752563,
"learning_rate": 4.697358159051549e-06,
"logits/chosen": -0.40925922989845276,
"logits/rejected": -0.4894172251224518,
"logps/chosen": -0.44294339418411255,
"logps/rejected": -0.9772504568099976,
"loss": 0.4878,
"odds_ratio_loss": 4.406769275665283,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": -0.0442943349480629,
"rewards/margins": 0.053430717438459396,
"rewards/rejected": -0.097725048661232,
"sft_loss": 0.047148533165454865,
"step": 80
},
{
"epoch": 2.6666666666666665,
"grad_norm": 2.225752115249634,
"learning_rate": 4.559191453574582e-06,
"logits/chosen": -0.3779674470424652,
"logits/rejected": -0.4604215621948242,
"logps/chosen": -0.28717148303985596,
"logps/rejected": -0.7895299196243286,
"loss": 0.332,
"odds_ratio_loss": 3.0206964015960693,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.028717149049043655,
"rewards/margins": 0.050235848873853683,
"rewards/rejected": -0.07895299792289734,
"sft_loss": 0.029953395947813988,
"step": 90
},
{
"epoch": 2.962962962962963,
"grad_norm": 1.3990237712860107,
"learning_rate": 4.398006164494358e-06,
"logits/chosen": -0.4050057828426361,
"logits/rejected": -0.4781204164028168,
"logps/chosen": -0.19231440126895905,
"logps/rejected": -0.5829997062683105,
"loss": 0.2371,
"odds_ratio_loss": 2.1702122688293457,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.019231440499424934,
"rewards/margins": 0.03906853124499321,
"rewards/rejected": -0.058299969881772995,
"sft_loss": 0.02010512165725231,
"step": 100
},
{
"epoch": 2.962962962962963,
"eval_logits/chosen": -0.38126423954963684,
"eval_logits/rejected": -0.455107718706131,
"eval_logps/chosen": -0.13484641909599304,
"eval_logps/rejected": -0.6987236142158508,
"eval_loss": 0.16552023589611053,
"eval_odds_ratio_loss": 1.47817862033844,
"eval_rewards/accuracies": 0.8833333253860474,
"eval_rewards/chosen": -0.013484641909599304,
"eval_rewards/margins": 0.05638771876692772,
"eval_rewards/rejected": -0.06987235695123672,
"eval_runtime": 2.3132,
"eval_samples_per_second": 25.938,
"eval_sft_loss": 0.01770237274467945,
"eval_steps_per_second": 12.969,
"step": 100
},
{
"epoch": 3.259259259259259,
"grad_norm": 1.6745034456253052,
"learning_rate": 4.215604094671835e-06,
"logits/chosen": -0.39228641986846924,
"logits/rejected": -0.4650408625602722,
"logps/chosen": -0.14995309710502625,
"logps/rejected": -0.6513184309005737,
"loss": 0.1933,
"odds_ratio_loss": 1.7675580978393555,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.014995308592915535,
"rewards/margins": 0.05013653635978699,
"rewards/rejected": -0.06513184309005737,
"sft_loss": 0.016546962782740593,
"step": 110
},
{
"epoch": 3.5555555555555554,
"grad_norm": 2.232027053833008,
"learning_rate": 4.014024217844167e-06,
"logits/chosen": -0.3439103364944458,
"logits/rejected": -0.41849011182785034,
"logps/chosen": -0.1279471218585968,
"logps/rejected": -0.5881286859512329,
"loss": 0.1679,
"odds_ratio_loss": 1.5120834112167358,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.012794713489711285,
"rewards/margins": 0.04601815715432167,
"rewards/rejected": -0.05881286785006523,
"sft_loss": 0.01666136085987091,
"step": 120
},
{
"epoch": 3.851851851851852,
"grad_norm": 1.1567457914352417,
"learning_rate": 3.7955198860439892e-06,
"logits/chosen": -0.4037134051322937,
"logits/rejected": -0.4531572461128235,
"logps/chosen": -0.10376612842082977,
"logps/rejected": -0.5740376114845276,
"loss": 0.1434,
"odds_ratio_loss": 1.3169727325439453,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.010376612655818462,
"rewards/margins": 0.04702714830636978,
"rewards/rejected": -0.05740376561880112,
"sft_loss": 0.011749515309929848,
"step": 130
},
{
"epoch": 4.148148148148148,
"grad_norm": 0.8253294229507446,
"learning_rate": 3.5625336406000752e-06,
"logits/chosen": -0.41028180718421936,
"logits/rejected": -0.46746310591697693,
"logps/chosen": -0.09531185775995255,
"logps/rejected": -0.5239280462265015,
"loss": 0.1289,
"odds_ratio_loss": 1.1706035137176514,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.00953118596225977,
"rewards/margins": 0.04286161810159683,
"rewards/rejected": -0.05239280313253403,
"sft_loss": 0.011803574860095978,
"step": 140
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.9827601313591003,
"learning_rate": 3.3176699082935546e-06,
"logits/chosen": -0.3458485007286072,
"logits/rejected": -0.4066559374332428,
"logps/chosen": -0.10192994773387909,
"logps/rejected": -0.5467253923416138,
"loss": 0.1421,
"odds_ratio_loss": 1.2931029796600342,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.010192994959652424,
"rewards/margins": 0.04447954148054123,
"rewards/rejected": -0.05467253923416138,
"sft_loss": 0.01281714253127575,
"step": 150
},
{
"epoch": 4.444444444444445,
"eval_logits/chosen": -0.34726279973983765,
"eval_logits/rejected": -0.4106636047363281,
"eval_logps/chosen": -0.0770278051495552,
"eval_logps/rejected": -0.5773364901542664,
"eval_loss": 0.10104309767484665,
"eval_odds_ratio_loss": 0.8869253396987915,
"eval_rewards/accuracies": 0.8833333253860474,
"eval_rewards/chosen": -0.007702780421823263,
"eval_rewards/margins": 0.05003087595105171,
"eval_rewards/rejected": -0.05773365497589111,
"eval_runtime": 2.316,
"eval_samples_per_second": 25.907,
"eval_sft_loss": 0.012350580655038357,
"eval_steps_per_second": 12.953,
"step": 150
},
{
"epoch": 4.7407407407407405,
"grad_norm": 1.6250287294387817,
"learning_rate": 3.0636658878845116e-06,
"logits/chosen": -0.38808631896972656,
"logits/rejected": -0.45208558440208435,
"logps/chosen": -0.10056424140930176,
"logps/rejected": -0.5426880717277527,
"loss": 0.1333,
"odds_ratio_loss": 1.2175222635269165,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.01005642395466566,
"rewards/margins": 0.04421238973736763,
"rewards/rejected": -0.054268814623355865,
"sft_loss": 0.011499151587486267,
"step": 160
},
{
"epoch": 5.037037037037037,
"grad_norm": 1.2549740076065063,
"learning_rate": 2.803360952452705e-06,
"logits/chosen": -0.3857024013996124,
"logits/rejected": -0.43612140417099,
"logps/chosen": -0.08464725315570831,
"logps/rejected": -0.4786381125450134,
"loss": 0.1229,
"odds_ratio_loss": 1.1305350065231323,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.008464725688099861,
"rewards/margins": 0.03939909487962723,
"rewards/rejected": -0.04786381870508194,
"sft_loss": 0.00985820684581995,
"step": 170
},
{
"epoch": 5.333333333333333,
"grad_norm": 2.602710485458374,
"learning_rate": 2.53966490958702e-06,
"logits/chosen": -0.32125982642173767,
"logits/rejected": -0.3869190812110901,
"logps/chosen": -0.0981438010931015,
"logps/rejected": -0.6228185892105103,
"loss": 0.1321,
"odds_ratio_loss": 1.208888053894043,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.00981437973678112,
"rewards/margins": 0.05246748402714729,
"rewards/rejected": -0.062281858175992966,
"sft_loss": 0.011162296868860722,
"step": 180
},
{
"epoch": 5.62962962962963,
"grad_norm": 0.7487705945968628,
"learning_rate": 2.275525474225771e-06,
"logits/chosen": -0.38048022985458374,
"logits/rejected": -0.45359840989112854,
"logps/chosen": -0.08556422591209412,
"logps/rejected": -0.568170428276062,
"loss": 0.118,
"odds_ratio_loss": 1.0685060024261475,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.008556422777473927,
"rewards/margins": 0.04826062172651291,
"rewards/rejected": -0.05681704729795456,
"sft_loss": 0.011130120605230331,
"step": 190
},
{
"epoch": 5.925925925925926,
"grad_norm": 1.8954200744628906,
"learning_rate": 2.013895317751323e-06,
"logits/chosen": -0.3612784445285797,
"logits/rejected": -0.398723840713501,
"logps/chosen": -0.09013709425926208,
"logps/rejected": -0.47434768080711365,
"loss": 0.1291,
"odds_ratio_loss": 1.1944711208343506,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.009013709612190723,
"rewards/margins": 0.03842105716466904,
"rewards/rejected": -0.047434769570827484,
"sft_loss": 0.00965641625225544,
"step": 200
},
{
"epoch": 5.925925925925926,
"eval_logits/chosen": -0.3441879153251648,
"eval_logits/rejected": -0.40659084916114807,
"eval_logps/chosen": -0.07519559562206268,
"eval_logps/rejected": -0.5936176776885986,
"eval_loss": 0.09843841940164566,
"eval_odds_ratio_loss": 0.8613345623016357,
"eval_rewards/accuracies": 0.8833333253860474,
"eval_rewards/chosen": -0.007519559469074011,
"eval_rewards/margins": 0.05184221267700195,
"eval_rewards/rejected": -0.0593617707490921,
"eval_runtime": 2.3134,
"eval_samples_per_second": 25.936,
"eval_sft_loss": 0.012304977513849735,
"eval_steps_per_second": 12.968,
"step": 200
},
{
"epoch": 6.222222222222222,
"grad_norm": 2.2815189361572266,
"learning_rate": 1.7576990616793139e-06,
"logits/chosen": -0.3727927803993225,
"logits/rejected": -0.4259300231933594,
"logps/chosen": -0.06345033645629883,
"logps/rejected": -0.5240000486373901,
"loss": 0.0946,
"odds_ratio_loss": 0.8643038868904114,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.006345034576952457,
"rewards/margins": 0.04605497419834137,
"rewards/rejected": -0.052400004118680954,
"sft_loss": 0.008158734068274498,
"step": 210
},
{
"epoch": 6.518518518518518,
"grad_norm": 1.3194066286087036,
"learning_rate": 1.509800584902108e-06,
"logits/chosen": -0.3670283854007721,
"logits/rejected": -0.427605003118515,
"logps/chosen": -0.09667733311653137,
"logps/rejected": -0.5639557838439941,
"loss": 0.1333,
"odds_ratio_loss": 1.2246453762054443,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.009667733684182167,
"rewards/margins": 0.04672784358263016,
"rewards/rejected": -0.056395579129457474,
"sft_loss": 0.010851002298295498,
"step": 220
},
{
"epoch": 6.814814814814815,
"grad_norm": 1.5913020372390747,
"learning_rate": 1.2729710099410802e-06,
"logits/chosen": -0.3422110676765442,
"logits/rejected": -0.41096681356430054,
"logps/chosen": -0.07137643545866013,
"logps/rejected": -0.5844155550003052,
"loss": 0.0972,
"odds_ratio_loss": 0.8859140276908875,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": -0.007137644104659557,
"rewards/margins": 0.05130390450358391,
"rewards/rejected": -0.05844154953956604,
"sft_loss": 0.008584940806031227,
"step": 230
},
{
"epoch": 7.111111111111111,
"grad_norm": 1.441452980041504,
"learning_rate": 1.049857726072005e-06,
"logits/chosen": -0.37981483340263367,
"logits/rejected": -0.42586684226989746,
"logps/chosen": -0.09972624480724335,
"logps/rejected": -0.5454004406929016,
"loss": 0.1338,
"odds_ratio_loss": 1.2288706302642822,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.009972624480724335,
"rewards/margins": 0.044567424803972244,
"rewards/rejected": -0.05454004928469658,
"sft_loss": 0.010933582670986652,
"step": 240
},
{
"epoch": 7.407407407407407,
"grad_norm": 2.5117592811584473,
"learning_rate": 8.4295479559726e-07,
"logits/chosen": -0.38271045684814453,
"logits/rejected": -0.4315881133079529,
"logps/chosen": -0.0871758908033371,
"logps/rejected": -0.5703214406967163,
"loss": 0.1246,
"odds_ratio_loss": 1.1464191675186157,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.00871758908033371,
"rewards/margins": 0.04831455647945404,
"rewards/rejected": -0.05703214555978775,
"sft_loss": 0.009948917664587498,
"step": 250
},
{
"epoch": 7.407407407407407,
"eval_logits/chosen": -0.34323057532310486,
"eval_logits/rejected": -0.4047623574733734,
"eval_logps/chosen": -0.07215116173028946,
"eval_logps/rejected": -0.6233159303665161,
"eval_loss": 0.09428545832633972,
"eval_odds_ratio_loss": 0.8242944478988647,
"eval_rewards/accuracies": 0.8833333253860474,
"eval_rewards/chosen": -0.0072151171043515205,
"eval_rewards/margins": 0.05511648207902908,
"eval_rewards/rejected": -0.06233159825205803,
"eval_runtime": 2.3121,
"eval_samples_per_second": 25.95,
"eval_sft_loss": 0.01185599621385336,
"eval_steps_per_second": 12.975,
"step": 250
},
{
"epoch": 7.703703703703704,
"grad_norm": 1.5904881954193115,
"learning_rate": 6.545750740770338e-07,
"logits/chosen": -0.3598392605781555,
"logits/rejected": -0.423635333776474,
"logps/chosen": -0.06470540165901184,
"logps/rejected": -0.5819328427314758,
"loss": 0.0906,
"odds_ratio_loss": 0.8195532560348511,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.006470539607107639,
"rewards/margins": 0.05172274261713028,
"rewards/rejected": -0.05819328501820564,
"sft_loss": 0.008671595714986324,
"step": 260
},
{
"epoch": 8.0,
"grad_norm": 0.6165652871131897,
"learning_rate": 4.868243561723535e-07,
"logits/chosen": -0.34859612584114075,
"logits/rejected": -0.4086515009403229,
"logps/chosen": -0.09018560498952866,
"logps/rejected": -0.587788462638855,
"loss": 0.1201,
"odds_ratio_loss": 1.098928689956665,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.009018560871481895,
"rewards/margins": 0.04976029321551323,
"rewards/rejected": -0.058778852224349976,
"sft_loss": 0.010236375033855438,
"step": 270
},
{
"epoch": 8.296296296296296,
"grad_norm": 1.252172589302063,
"learning_rate": 3.4157783610952263e-07,
"logits/chosen": -0.3684031367301941,
"logits/rejected": -0.4260830283164978,
"logps/chosen": -0.0856148824095726,
"logps/rejected": -0.5833510756492615,
"loss": 0.1153,
"odds_ratio_loss": 1.06239914894104,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.008561487309634686,
"rewards/margins": 0.049773626029491425,
"rewards/rejected": -0.058335114270448685,
"sft_loss": 0.009099993854761124,
"step": 280
},
{
"epoch": 8.592592592592592,
"grad_norm": 1.9929240942001343,
"learning_rate": 2.2045914590165252e-07,
"logits/chosen": -0.4020005166530609,
"logits/rejected": -0.46092405915260315,
"logps/chosen": -0.07739080488681793,
"logps/rejected": -0.62553870677948,
"loss": 0.1022,
"odds_ratio_loss": 0.9326642155647278,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.007739080581814051,
"rewards/margins": 0.054814793169498444,
"rewards/rejected": -0.06255386769771576,
"sft_loss": 0.008896315470337868,
"step": 290
},
{
"epoch": 8.88888888888889,
"grad_norm": 0.9893295764923096,
"learning_rate": 1.2482220564763669e-07,
"logits/chosen": -0.34586095809936523,
"logits/rejected": -0.39634814858436584,
"logps/chosen": -0.07016898691654205,
"logps/rejected": -0.5465742349624634,
"loss": 0.1045,
"odds_ratio_loss": 0.9643081426620483,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.0070168995298445225,
"rewards/margins": 0.04764052852988243,
"rewards/rejected": -0.054657429456710815,
"sft_loss": 0.008084597066044807,
"step": 300
},
{
"epoch": 8.88888888888889,
"eval_logits/chosen": -0.34323617815971375,
"eval_logits/rejected": -0.40456622838974,
"eval_logps/chosen": -0.07242080569267273,
"eval_logps/rejected": -0.6277292966842651,
"eval_loss": 0.09481088072061539,
"eval_odds_ratio_loss": 0.829154908657074,
"eval_rewards/accuracies": 0.8833333253860474,
"eval_rewards/chosen": -0.007242080755531788,
"eval_rewards/margins": 0.0555308535695076,
"eval_rewards/rejected": -0.06277292966842651,
"eval_runtime": 2.3109,
"eval_samples_per_second": 25.964,
"eval_sft_loss": 0.011895372532308102,
"eval_steps_per_second": 12.982,
"step": 300
},
{
"epoch": 9.185185185185185,
"grad_norm": 1.7534313201904297,
"learning_rate": 5.573608879422876e-08,
"logits/chosen": -0.35441476106643677,
"logits/rejected": -0.4039112627506256,
"logps/chosen": -0.08877753466367722,
"logps/rejected": -0.5493366718292236,
"loss": 0.1254,
"odds_ratio_loss": 1.14632248878479,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.008877754211425781,
"rewards/margins": 0.046055909246206284,
"rewards/rejected": -0.05493366718292236,
"sft_loss": 0.01074306946247816,
"step": 310
},
{
"epoch": 9.481481481481481,
"grad_norm": 1.6547372341156006,
"learning_rate": 1.3973071544233219e-08,
"logits/chosen": -0.37551018595695496,
"logits/rejected": -0.4359508454799652,
"logps/chosen": -0.07044418156147003,
"logps/rejected": -0.5796228647232056,
"loss": 0.098,
"odds_ratio_loss": 0.9046875834465027,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.007044418249279261,
"rewards/margins": 0.05091787129640579,
"rewards/rejected": -0.05796227976679802,
"sft_loss": 0.007553444243967533,
"step": 320
},
{
"epoch": 9.777777777777779,
"grad_norm": 1.2167924642562866,
"learning_rate": 0.0,
"logits/chosen": -0.36357811093330383,
"logits/rejected": -0.4220617711544037,
"logps/chosen": -0.0854811817407608,
"logps/rejected": -0.5617056488990784,
"loss": 0.1206,
"odds_ratio_loss": 1.0994065999984741,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.008548117242753506,
"rewards/margins": 0.047622449696063995,
"rewards/rejected": -0.05617056414484978,
"sft_loss": 0.010644225403666496,
"step": 330
},
{
"epoch": 9.777777777777779,
"step": 330,
"total_flos": 5.97337003547689e+16,
"train_loss": 0.41489303653890436,
"train_runtime": 721.1538,
"train_samples_per_second": 7.488,
"train_steps_per_second": 0.458
}
],
"logging_steps": 10,
"max_steps": 330,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.97337003547689e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}