Qwen2-1.5B-ORPO / last-checkpoint /trainer_state.json
iamnguyen's picture
Training in progress, step 44, checkpoint
f8624af verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.06428783416660959,
"eval_steps": 500,
"global_step": 44,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001461087140150218,
"grad_norm": 2.4780876636505127,
"learning_rate": 1.4285714285714286e-06,
"log_odds_chosen": 0.016244888305664062,
"log_odds_ratio": -0.6993749141693115,
"logits/chosen": -2.2119967937469482,
"logits/rejected": -2.212354898452759,
"logps/chosen": -1.583775520324707,
"logps/rejected": -1.594750165939331,
"loss": 1.8606,
"nll_loss": 1.7906217575073242,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.1583775281906128,
"rewards/margins": 0.0010974762262776494,
"rewards/rejected": -0.15947501361370087,
"step": 1
},
{
"epoch": 0.002922174280300436,
"grad_norm": 2.627044200897217,
"learning_rate": 2.8571428571428573e-06,
"log_odds_chosen": 0.11979679018259048,
"log_odds_ratio": -0.6573244333267212,
"logits/chosen": -2.2274065017700195,
"logits/rejected": -2.243682622909546,
"logps/chosen": -1.5664572715759277,
"logps/rejected": -1.6658927202224731,
"loss": 1.8544,
"nll_loss": 1.7886956930160522,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.156645730137825,
"rewards/margins": 0.009943531826138496,
"rewards/rejected": -0.16658926010131836,
"step": 2
},
{
"epoch": 0.004383261420450654,
"grad_norm": 2.113027334213257,
"learning_rate": 4.2857142857142855e-06,
"log_odds_chosen": 0.058733537793159485,
"log_odds_ratio": -0.6822808980941772,
"logits/chosen": -2.1965179443359375,
"logits/rejected": -2.195549964904785,
"logps/chosen": -1.4528911113739014,
"logps/rejected": -1.498779296875,
"loss": 1.7314,
"nll_loss": 1.6631801128387451,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.1452891230583191,
"rewards/margins": 0.004588826093822718,
"rewards/rejected": -0.14987793564796448,
"step": 3
},
{
"epoch": 0.005844348560600872,
"grad_norm": 1.8004069328308105,
"learning_rate": 5.7142857142857145e-06,
"log_odds_chosen": 0.09058406949043274,
"log_odds_ratio": -0.6622194051742554,
"logits/chosen": -2.240548849105835,
"logits/rejected": -2.276327133178711,
"logps/chosen": -1.469621181488037,
"logps/rejected": -1.5354365110397339,
"loss": 1.8061,
"nll_loss": 1.7399120330810547,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.14696213603019714,
"rewards/margins": 0.006581515539437532,
"rewards/rejected": -0.1535436362028122,
"step": 4
},
{
"epoch": 0.00730543570075109,
"grad_norm": 1.565532922744751,
"learning_rate": 7.1428571428571436e-06,
"log_odds_chosen": 0.052982207387685776,
"log_odds_ratio": -0.6897823214530945,
"logits/chosen": -2.2151541709899902,
"logits/rejected": -2.215179681777954,
"logps/chosen": -1.5283693075180054,
"logps/rejected": -1.5738036632537842,
"loss": 1.8656,
"nll_loss": 1.7966063022613525,
"rewards/accuracies": 0.53125,
"rewards/chosen": -0.15283691883087158,
"rewards/margins": 0.004543437156826258,
"rewards/rejected": -0.1573803573846817,
"step": 5
},
{
"epoch": 0.008766522840901307,
"grad_norm": 1.689719557762146,
"learning_rate": 8.571428571428571e-06,
"log_odds_chosen": -0.021141668781638145,
"log_odds_ratio": -0.7178523540496826,
"logits/chosen": -2.180445432662964,
"logits/rejected": -2.216461658477783,
"logps/chosen": -1.423187255859375,
"logps/rejected": -1.4068344831466675,
"loss": 1.7435,
"nll_loss": 1.671682357788086,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.1423187255859375,
"rewards/margins": -0.0016352771781384945,
"rewards/rejected": -0.14068344235420227,
"step": 6
},
{
"epoch": 0.010227609981051527,
"grad_norm": 1.432656168937683,
"learning_rate": 1e-05,
"log_odds_chosen": -0.045114632695913315,
"log_odds_ratio": -0.7312092781066895,
"logits/chosen": -2.2011773586273193,
"logits/rejected": -2.201080799102783,
"logps/chosen": -1.5241130590438843,
"logps/rejected": -1.4893730878829956,
"loss": 1.793,
"nll_loss": 1.719857931137085,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.1524112969636917,
"rewards/margins": -0.003474000608548522,
"rewards/rejected": -0.14893729984760284,
"step": 7
},
{
"epoch": 0.011688697121201744,
"grad_norm": 1.520970106124878,
"learning_rate": 9.9999461653818e-06,
"log_odds_chosen": -0.08909691870212555,
"log_odds_ratio": -0.7692862153053284,
"logits/chosen": -2.243410110473633,
"logits/rejected": -2.2267463207244873,
"logps/chosen": -1.446630597114563,
"logps/rejected": -1.396761417388916,
"loss": 1.759,
"nll_loss": 1.682105541229248,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.14466306567192078,
"rewards/margins": -0.00498693622648716,
"rewards/rejected": -0.13967613875865936,
"step": 8
},
{
"epoch": 0.013149784261351962,
"grad_norm": 1.2674661874771118,
"learning_rate": 9.999784662686462e-06,
"log_odds_chosen": 0.03795226663351059,
"log_odds_ratio": -0.7072824835777283,
"logits/chosen": -2.1986892223358154,
"logits/rejected": -2.2072536945343018,
"logps/chosen": -1.4396700859069824,
"logps/rejected": -1.471963882446289,
"loss": 1.7775,
"nll_loss": 1.7067829370498657,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.14396700263023376,
"rewards/margins": 0.0032293866388499737,
"rewards/rejected": -0.14719641208648682,
"step": 9
},
{
"epoch": 0.01461087140150218,
"grad_norm": 0.984953761100769,
"learning_rate": 9.999515495391765e-06,
"log_odds_chosen": -0.1402886062860489,
"log_odds_ratio": -0.7837027907371521,
"logits/chosen": -2.173368453979492,
"logits/rejected": -2.152172803878784,
"logps/chosen": -1.3456827402114868,
"logps/rejected": -1.2599360942840576,
"loss": 1.6237,
"nll_loss": 1.5452890396118164,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.13456827402114868,
"rewards/margins": -0.008574655279517174,
"rewards/rejected": -0.12599360942840576,
"step": 10
},
{
"epoch": 0.016071958541652397,
"grad_norm": 0.9352578520774841,
"learning_rate": 9.999138669293913e-06,
"log_odds_chosen": 0.030955376103520393,
"log_odds_ratio": -0.6955101490020752,
"logits/chosen": -2.242959976196289,
"logits/rejected": -2.2378480434417725,
"logps/chosen": -1.3454885482788086,
"logps/rejected": -1.3571863174438477,
"loss": 1.6984,
"nll_loss": 1.6288717985153198,
"rewards/accuracies": 0.453125,
"rewards/chosen": -0.1345488578081131,
"rewards/margins": 0.0011697756126523018,
"rewards/rejected": -0.13571862876415253,
"step": 11
},
{
"epoch": 0.017533045681802615,
"grad_norm": 0.7604814171791077,
"learning_rate": 9.998654192507421e-06,
"log_odds_chosen": -0.025142917409539223,
"log_odds_ratio": -0.7221760153770447,
"logits/chosen": -2.1557188034057617,
"logits/rejected": -2.181380271911621,
"logps/chosen": -1.3569337129592896,
"logps/rejected": -1.342216968536377,
"loss": 1.6662,
"nll_loss": 1.59402596950531,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.13569337129592896,
"rewards/margins": -0.0014716808218508959,
"rewards/rejected": -0.13422170281410217,
"step": 12
},
{
"epoch": 0.018994132821952833,
"grad_norm": 0.7423222064971924,
"learning_rate": 9.998062075464943e-06,
"log_odds_chosen": -0.11804741621017456,
"log_odds_ratio": -0.7873090505599976,
"logits/chosen": -2.200981616973877,
"logits/rejected": -2.1934943199157715,
"logps/chosen": -1.2868025302886963,
"logps/rejected": -1.2002918720245361,
"loss": 1.6073,
"nll_loss": 1.5285530090332031,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.1286802589893341,
"rewards/margins": -0.008651047013700008,
"rewards/rejected": -0.12002921104431152,
"step": 13
},
{
"epoch": 0.020455219962103054,
"grad_norm": 0.6683437824249268,
"learning_rate": 9.997362330917032e-06,
"log_odds_chosen": -0.012934267520904541,
"log_odds_ratio": -0.7275698184967041,
"logits/chosen": -2.1948914527893066,
"logits/rejected": -2.200413227081299,
"logps/chosen": -1.3400592803955078,
"logps/rejected": -1.3343003988265991,
"loss": 1.6521,
"nll_loss": 1.5793243646621704,
"rewards/accuracies": 0.453125,
"rewards/chosen": -0.13400591909885406,
"rewards/margins": -0.0005758859915658832,
"rewards/rejected": -0.13343004882335663,
"step": 14
},
{
"epoch": 0.02191630710225327,
"grad_norm": 0.6963202357292175,
"learning_rate": 9.996554973931884e-06,
"log_odds_chosen": -0.11072862148284912,
"log_odds_ratio": -0.7919189929962158,
"logits/chosen": -2.1773252487182617,
"logits/rejected": -2.162838935852051,
"logps/chosen": -1.305843710899353,
"logps/rejected": -1.2427345514297485,
"loss": 1.6167,
"nll_loss": 1.5374785661697388,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.13058437407016754,
"rewards/margins": -0.006310915574431419,
"rewards/rejected": -0.12427344918251038,
"step": 15
},
{
"epoch": 0.02337739424240349,
"grad_norm": 0.6812885999679565,
"learning_rate": 9.995640021894996e-06,
"log_odds_chosen": -0.03462236002087593,
"log_odds_ratio": -0.7293962836265564,
"logits/chosen": -2.1697518825531006,
"logits/rejected": -2.1682655811309814,
"logps/chosen": -1.2563740015029907,
"logps/rejected": -1.2293565273284912,
"loss": 1.5666,
"nll_loss": 1.4936596155166626,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.12563739717006683,
"rewards/margins": -0.0027017316315323114,
"rewards/rejected": -0.12293566763401031,
"step": 16
},
{
"epoch": 0.024838481382553707,
"grad_norm": 0.6587896943092346,
"learning_rate": 9.994617494508811e-06,
"log_odds_chosen": -0.010003458708524704,
"log_odds_ratio": -0.7430539727210999,
"logits/chosen": -2.200150966644287,
"logits/rejected": -2.1906399726867676,
"logps/chosen": -1.2153267860412598,
"logps/rejected": -1.1962745189666748,
"loss": 1.5641,
"nll_loss": 1.4898183345794678,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.12153266370296478,
"rewards/margins": -0.001905218348838389,
"rewards/rejected": -0.119627445936203,
"step": 17
},
{
"epoch": 0.026299568522703924,
"grad_norm": 0.5830309987068176,
"learning_rate": 9.993487413792276e-06,
"log_odds_chosen": -0.1340780407190323,
"log_odds_ratio": -0.7850849628448486,
"logits/chosen": -2.1663918495178223,
"logits/rejected": -2.145009756088257,
"logps/chosen": -1.2580608129501343,
"logps/rejected": -1.188957929611206,
"loss": 1.5591,
"nll_loss": 1.4806358814239502,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.1258060783147812,
"rewards/margins": -0.006910297088325024,
"rewards/rejected": -0.11889579892158508,
"step": 18
},
{
"epoch": 0.027760655662854142,
"grad_norm": 0.5536736249923706,
"learning_rate": 9.992249804080372e-06,
"log_odds_chosen": -0.021063022315502167,
"log_odds_ratio": -0.7351462244987488,
"logits/chosen": -2.1682121753692627,
"logits/rejected": -2.163313150405884,
"logps/chosen": -1.2965900897979736,
"logps/rejected": -1.2690778970718384,
"loss": 1.6294,
"nll_loss": 1.5559338331222534,
"rewards/accuracies": 0.421875,
"rewards/chosen": -0.1296590119600296,
"rewards/margins": -0.0027512230444699526,
"rewards/rejected": -0.12690778076648712,
"step": 19
},
{
"epoch": 0.02922174280300436,
"grad_norm": 0.5634390711784363,
"learning_rate": 9.990904692023604e-06,
"log_odds_chosen": 0.025263303890824318,
"log_odds_ratio": -0.7174615263938904,
"logits/chosen": -2.26141357421875,
"logits/rejected": -2.250270366668701,
"logps/chosen": -1.3806607723236084,
"logps/rejected": -1.3875806331634521,
"loss": 1.7203,
"nll_loss": 1.6485247611999512,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.13806606829166412,
"rewards/margins": 0.0006919947918504477,
"rewards/rejected": -0.1387580782175064,
"step": 20
},
{
"epoch": 0.030682829943154577,
"grad_norm": 0.5434465408325195,
"learning_rate": 9.989452106587406e-06,
"log_odds_chosen": 0.012862562201917171,
"log_odds_ratio": -0.7206666469573975,
"logits/chosen": -2.1785006523132324,
"logits/rejected": -2.181265354156494,
"logps/chosen": -1.2523807287216187,
"logps/rejected": -1.2531023025512695,
"loss": 1.5781,
"nll_loss": 1.5060465335845947,
"rewards/accuracies": 0.421875,
"rewards/chosen": -0.1252380609512329,
"rewards/margins": 7.216550875455141e-05,
"rewards/rejected": -0.1253102421760559,
"step": 21
},
{
"epoch": 0.032143917083304795,
"grad_norm": 0.6211642026901245,
"learning_rate": 9.987892079051531e-06,
"log_odds_chosen": 0.012624351307749748,
"log_odds_ratio": -0.7291357517242432,
"logits/chosen": -2.179482936859131,
"logits/rejected": -2.176547050476074,
"logps/chosen": -1.3581693172454834,
"logps/rejected": -1.3609216213226318,
"loss": 1.6565,
"nll_loss": 1.5835901498794556,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.13581693172454834,
"rewards/margins": 0.0002752433065325022,
"rewards/rejected": -0.1360921710729599,
"step": 22
},
{
"epoch": 0.03360500422345501,
"grad_norm": 0.769701361656189,
"learning_rate": 9.986224643009375e-06,
"log_odds_chosen": 0.03776140883564949,
"log_odds_ratio": -0.6954489946365356,
"logits/chosen": -2.1131434440612793,
"logits/rejected": -2.1170055866241455,
"logps/chosen": -1.2192529439926147,
"logps/rejected": -1.2428308725357056,
"loss": 1.519,
"nll_loss": 1.4494192600250244,
"rewards/accuracies": 0.453125,
"rewards/chosen": -0.12192529439926147,
"rewards/margins": 0.0023577904794365168,
"rewards/rejected": -0.1242830902338028,
"step": 23
},
{
"epoch": 0.03506609136360523,
"grad_norm": 0.6579698324203491,
"learning_rate": 9.984449834367251e-06,
"log_odds_chosen": -0.08287765085697174,
"log_odds_ratio": -0.7655948400497437,
"logits/chosen": -2.117600440979004,
"logits/rejected": -2.118288993835449,
"logps/chosen": -1.3066436052322388,
"logps/rejected": -1.2814935445785522,
"loss": 1.5987,
"nll_loss": 1.5221776962280273,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.13066436350345612,
"rewards/margins": -0.0025150016881525517,
"rewards/rejected": -0.1281493604183197,
"step": 24
},
{
"epoch": 0.03652717850375545,
"grad_norm": 0.5370662808418274,
"learning_rate": 9.982567691343617e-06,
"log_odds_chosen": -0.041301436722278595,
"log_odds_ratio": -0.7494814395904541,
"logits/chosen": -2.1119844913482666,
"logits/rejected": -2.1228156089782715,
"logps/chosen": -1.2678842544555664,
"logps/rejected": -1.2463194131851196,
"loss": 1.5457,
"nll_loss": 1.4707541465759277,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.1267884224653244,
"rewards/margins": -0.0021564930211752653,
"rewards/rejected": -0.12463192641735077,
"step": 25
},
{
"epoch": 0.037988265643905665,
"grad_norm": 0.5143423080444336,
"learning_rate": 9.980578254468252e-06,
"log_odds_chosen": -0.05193250998854637,
"log_odds_ratio": -0.7482390403747559,
"logits/chosen": -2.1540181636810303,
"logits/rejected": -2.136885166168213,
"logps/chosen": -1.3168916702270508,
"logps/rejected": -1.2728286981582642,
"loss": 1.6123,
"nll_loss": 1.5374691486358643,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.1316891610622406,
"rewards/margins": -0.004406292457133532,
"rewards/rejected": -0.12728287279605865,
"step": 26
},
{
"epoch": 0.03944935278405589,
"grad_norm": 0.5746509432792664,
"learning_rate": 9.978481566581388e-06,
"log_odds_chosen": -0.09941092133522034,
"log_odds_ratio": -0.7670192122459412,
"logits/chosen": -2.0658328533172607,
"logits/rejected": -2.053611993789673,
"logps/chosen": -1.3229994773864746,
"logps/rejected": -1.2689064741134644,
"loss": 1.5754,
"nll_loss": 1.4986768960952759,
"rewards/accuracies": 0.265625,
"rewards/chosen": -0.13229995965957642,
"rewards/margins": -0.005409288220107555,
"rewards/rejected": -0.1268906593322754,
"step": 27
},
{
"epoch": 0.04091043992420611,
"grad_norm": 0.5285203456878662,
"learning_rate": 9.976277672832782e-06,
"log_odds_chosen": -0.009130319580435753,
"log_odds_ratio": -0.7242738008499146,
"logits/chosen": -2.119077682495117,
"logits/rejected": -2.109398365020752,
"logps/chosen": -1.311572790145874,
"logps/rejected": -1.3026206493377686,
"loss": 1.596,
"nll_loss": 1.5235313177108765,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.1311572790145874,
"rewards/margins": -0.0008952060015872121,
"rewards/rejected": -0.1302620768547058,
"step": 28
},
{
"epoch": 0.042371527064356325,
"grad_norm": 0.5054935812950134,
"learning_rate": 9.973966620680741e-06,
"log_odds_chosen": 9.676720947027206e-05,
"log_odds_ratio": -0.7112984657287598,
"logits/chosen": -2.0804967880249023,
"logits/rejected": -2.0964128971099854,
"logps/chosen": -1.3590530157089233,
"logps/rejected": -1.3668309450149536,
"loss": 1.6081,
"nll_loss": 1.5369728803634644,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.13590531051158905,
"rewards/margins": 0.0007777921855449677,
"rewards/rejected": -0.13668310642242432,
"step": 29
},
{
"epoch": 0.04383261420450654,
"grad_norm": 0.5947529077529907,
"learning_rate": 9.971548459891113e-06,
"log_odds_chosen": -0.005686625838279724,
"log_odds_ratio": -0.7236043810844421,
"logits/chosen": -2.040478229522705,
"logits/rejected": -2.050001859664917,
"logps/chosen": -1.3079044818878174,
"logps/rejected": -1.3112431764602661,
"loss": 1.6105,
"nll_loss": 1.5381548404693604,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.13079045712947845,
"rewards/margins": 0.0003338647074997425,
"rewards/rejected": -0.1311243176460266,
"step": 30
},
{
"epoch": 0.04529370134465676,
"grad_norm": 0.541822075843811,
"learning_rate": 9.969023242536206e-06,
"log_odds_chosen": -0.03166639059782028,
"log_odds_ratio": -0.7292428612709045,
"logits/chosen": -2.040898323059082,
"logits/rejected": -2.0398669242858887,
"logps/chosen": -1.2686175107955933,
"logps/rejected": -1.261348843574524,
"loss": 1.541,
"nll_loss": 1.4681065082550049,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.12686176598072052,
"rewards/margins": -0.0007268765475600958,
"rewards/rejected": -0.12613488733768463,
"step": 31
},
{
"epoch": 0.04675478848480698,
"grad_norm": 0.6896752715110779,
"learning_rate": 9.966391022993658e-06,
"log_odds_chosen": 0.13857628405094147,
"log_odds_ratio": -0.6746503114700317,
"logits/chosen": -2.005702495574951,
"logits/rejected": -2.009239912033081,
"logps/chosen": -1.14279043674469,
"logps/rejected": -1.2037732601165771,
"loss": 1.475,
"nll_loss": 1.4074950218200684,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.11427903920412064,
"rewards/margins": 0.00609829043969512,
"rewards/rejected": -0.12037733197212219,
"step": 32
},
{
"epoch": 0.048215875624957195,
"grad_norm": 0.4958839416503906,
"learning_rate": 9.963651857945286e-06,
"log_odds_chosen": -0.07793000340461731,
"log_odds_ratio": -0.7607824206352234,
"logits/chosen": -2.067959785461426,
"logits/rejected": -2.06540584564209,
"logps/chosen": -1.3159579038619995,
"logps/rejected": -1.2743453979492188,
"loss": 1.657,
"nll_loss": 1.5809520483016968,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.13159577548503876,
"rewards/margins": -0.004161248914897442,
"rewards/rejected": -0.12743453681468964,
"step": 33
},
{
"epoch": 0.04967696276510741,
"grad_norm": 0.530023992061615,
"learning_rate": 9.960805806375854e-06,
"log_odds_chosen": 0.017400667071342468,
"log_odds_ratio": -0.7287170886993408,
"logits/chosen": -2.049931526184082,
"logits/rejected": -2.067579984664917,
"logps/chosen": -1.313307285308838,
"logps/rejected": -1.3060802221298218,
"loss": 1.5791,
"nll_loss": 1.506237268447876,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.1313307285308838,
"rewards/margins": -0.0007226967136375606,
"rewards/rejected": -0.13060802221298218,
"step": 34
},
{
"epoch": 0.05113804990525763,
"grad_norm": 0.5267317891120911,
"learning_rate": 9.957852929571799e-06,
"log_odds_chosen": -0.08755096048116684,
"log_odds_ratio": -0.7537316679954529,
"logits/chosen": -2.0170559883117676,
"logits/rejected": -2.0241918563842773,
"logps/chosen": -1.3282378911972046,
"logps/rejected": -1.267946481704712,
"loss": 1.5959,
"nll_loss": 1.5205148458480835,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.13282378017902374,
"rewards/margins": -0.006029147654771805,
"rewards/rejected": -0.12679465115070343,
"step": 35
},
{
"epoch": 0.05259913704540785,
"grad_norm": 0.5598548054695129,
"learning_rate": 9.954793291119917e-06,
"log_odds_chosen": -0.07335270196199417,
"log_odds_ratio": -0.7724658846855164,
"logits/chosen": -2.0005574226379395,
"logits/rejected": -1.985095739364624,
"logps/chosen": -1.2228074073791504,
"logps/rejected": -1.1555684804916382,
"loss": 1.5164,
"nll_loss": 1.4391915798187256,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.12228074669837952,
"rewards/margins": -0.006723896134644747,
"rewards/rejected": -0.11555685847997665,
"step": 36
},
{
"epoch": 0.054060224185558066,
"grad_norm": 0.5158045887947083,
"learning_rate": 9.951626956906001e-06,
"log_odds_chosen": -0.1070481538772583,
"log_odds_ratio": -0.767937421798706,
"logits/chosen": -2.022592067718506,
"logits/rejected": -2.0269908905029297,
"logps/chosen": -1.3267402648925781,
"logps/rejected": -1.2489224672317505,
"loss": 1.5828,
"nll_loss": 1.5060540437698364,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.13267403841018677,
"rewards/margins": -0.0077817970886826515,
"rewards/rejected": -0.1248922348022461,
"step": 37
},
{
"epoch": 0.055521311325708284,
"grad_norm": 0.5369778275489807,
"learning_rate": 9.948353995113405e-06,
"log_odds_chosen": 0.02357397973537445,
"log_odds_ratio": -0.7277624011039734,
"logits/chosen": -1.971970796585083,
"logits/rejected": -2.0005533695220947,
"logps/chosen": -1.190409541130066,
"logps/rejected": -1.1846762895584106,
"loss": 1.4772,
"nll_loss": 1.404414176940918,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.11904095858335495,
"rewards/margins": -0.0005733318976126611,
"rewards/rejected": -0.11846762895584106,
"step": 38
},
{
"epoch": 0.0569823984658585,
"grad_norm": 0.5577079057693481,
"learning_rate": 9.944974476221587e-06,
"log_odds_chosen": 0.05960956960916519,
"log_odds_ratio": -0.694072425365448,
"logits/chosen": -2.0205910205841064,
"logits/rejected": -2.010025978088379,
"logps/chosen": -1.2545329332351685,
"logps/rejected": -1.2769423723220825,
"loss": 1.5708,
"nll_loss": 1.5014022588729858,
"rewards/accuracies": 0.46875,
"rewards/chosen": -0.12545329332351685,
"rewards/margins": 0.002240956760942936,
"rewards/rejected": -0.1276942491531372,
"step": 39
},
{
"epoch": 0.05844348560600872,
"grad_norm": 0.5200385451316833,
"learning_rate": 9.941488473004594e-06,
"log_odds_chosen": -0.016573341563344002,
"log_odds_ratio": -0.7171596884727478,
"logits/chosen": -2.0057106018066406,
"logits/rejected": -2.004528760910034,
"logps/chosen": -1.4052870273590088,
"logps/rejected": -1.3805549144744873,
"loss": 1.6515,
"nll_loss": 1.5797675848007202,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.14052869379520416,
"rewards/margins": -0.0024731969460844994,
"rewards/rejected": -0.13805550336837769,
"step": 40
},
{
"epoch": 0.059904572746158936,
"grad_norm": 0.5273575186729431,
"learning_rate": 9.937896060529485e-06,
"log_odds_chosen": -0.08113294094800949,
"log_odds_ratio": -0.7743253707885742,
"logits/chosen": -1.9785962104797363,
"logits/rejected": -1.9705551862716675,
"logps/chosen": -1.2821004390716553,
"logps/rejected": -1.208033561706543,
"loss": 1.5836,
"nll_loss": 1.5061376094818115,
"rewards/accuracies": 0.234375,
"rewards/chosen": -0.12821003794670105,
"rewards/margins": -0.007406666409224272,
"rewards/rejected": -0.12080337107181549,
"step": 41
},
{
"epoch": 0.061365659886309154,
"grad_norm": 0.5236508250236511,
"learning_rate": 9.934197316154721e-06,
"log_odds_chosen": -0.13003796339035034,
"log_odds_ratio": -0.7939882278442383,
"logits/chosen": -1.9101815223693848,
"logits/rejected": -1.943664312362671,
"logps/chosen": -1.155348777770996,
"logps/rejected": -1.0918152332305908,
"loss": 1.4569,
"nll_loss": 1.377458095550537,
"rewards/accuracies": 0.328125,
"rewards/chosen": -0.11553487926721573,
"rewards/margins": -0.006353363860398531,
"rewards/rejected": -0.10918151587247849,
"step": 42
},
{
"epoch": 0.06282674702645938,
"grad_norm": 0.4699207842350006,
"learning_rate": 9.9303923195285e-06,
"log_odds_chosen": -0.041184213012456894,
"log_odds_ratio": -0.7406144142150879,
"logits/chosen": -1.9506869316101074,
"logits/rejected": -1.9735560417175293,
"logps/chosen": -1.2942121028900146,
"logps/rejected": -1.2584986686706543,
"loss": 1.5464,
"nll_loss": 1.4723409414291382,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.12942121922969818,
"rewards/margins": -0.0035713440738618374,
"rewards/rejected": -0.1258498728275299,
"step": 43
},
{
"epoch": 0.06428783416660959,
"grad_norm": 0.5235589742660522,
"learning_rate": 9.92648115258704e-06,
"log_odds_chosen": 0.04650488868355751,
"log_odds_ratio": -0.7010443806648254,
"logits/chosen": -1.9501118659973145,
"logits/rejected": -1.9561800956726074,
"logps/chosen": -1.1499981880187988,
"logps/rejected": -1.1835464239120483,
"loss": 1.4224,
"nll_loss": 1.3523142337799072,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.11499983072280884,
"rewards/margins": 0.003354821354150772,
"rewards/rejected": -0.11835464835166931,
"step": 44
}
],
"logging_steps": 1.0,
"max_steps": 684,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 4,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}