|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06428783416660959, |
|
"eval_steps": 500, |
|
"global_step": 44, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001461087140150218, |
|
"grad_norm": 2.4780876636505127, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"log_odds_chosen": 0.016244888305664062, |
|
"log_odds_ratio": -0.6993749141693115, |
|
"logits/chosen": -2.2119967937469482, |
|
"logits/rejected": -2.212354898452759, |
|
"logps/chosen": -1.583775520324707, |
|
"logps/rejected": -1.594750165939331, |
|
"loss": 1.8606, |
|
"nll_loss": 1.7906217575073242, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.1583775281906128, |
|
"rewards/margins": 0.0010974762262776494, |
|
"rewards/rejected": -0.15947501361370087, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002922174280300436, |
|
"grad_norm": 2.627044200897217, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"log_odds_chosen": 0.11979679018259048, |
|
"log_odds_ratio": -0.6573244333267212, |
|
"logits/chosen": -2.2274065017700195, |
|
"logits/rejected": -2.243682622909546, |
|
"logps/chosen": -1.5664572715759277, |
|
"logps/rejected": -1.6658927202224731, |
|
"loss": 1.8544, |
|
"nll_loss": 1.7886956930160522, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.156645730137825, |
|
"rewards/margins": 0.009943531826138496, |
|
"rewards/rejected": -0.16658926010131836, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004383261420450654, |
|
"grad_norm": 2.113027334213257, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"log_odds_chosen": 0.058733537793159485, |
|
"log_odds_ratio": -0.6822808980941772, |
|
"logits/chosen": -2.1965179443359375, |
|
"logits/rejected": -2.195549964904785, |
|
"logps/chosen": -1.4528911113739014, |
|
"logps/rejected": -1.498779296875, |
|
"loss": 1.7314, |
|
"nll_loss": 1.6631801128387451, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1452891230583191, |
|
"rewards/margins": 0.004588826093822718, |
|
"rewards/rejected": -0.14987793564796448, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005844348560600872, |
|
"grad_norm": 1.8004069328308105, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"log_odds_chosen": 0.09058406949043274, |
|
"log_odds_ratio": -0.6622194051742554, |
|
"logits/chosen": -2.240548849105835, |
|
"logits/rejected": -2.276327133178711, |
|
"logps/chosen": -1.469621181488037, |
|
"logps/rejected": -1.5354365110397339, |
|
"loss": 1.8061, |
|
"nll_loss": 1.7399120330810547, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.14696213603019714, |
|
"rewards/margins": 0.006581515539437532, |
|
"rewards/rejected": -0.1535436362028122, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00730543570075109, |
|
"grad_norm": 1.565532922744751, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"log_odds_chosen": 0.052982207387685776, |
|
"log_odds_ratio": -0.6897823214530945, |
|
"logits/chosen": -2.2151541709899902, |
|
"logits/rejected": -2.215179681777954, |
|
"logps/chosen": -1.5283693075180054, |
|
"logps/rejected": -1.5738036632537842, |
|
"loss": 1.8656, |
|
"nll_loss": 1.7966063022613525, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.15283691883087158, |
|
"rewards/margins": 0.004543437156826258, |
|
"rewards/rejected": -0.1573803573846817, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008766522840901307, |
|
"grad_norm": 1.689719557762146, |
|
"learning_rate": 8.571428571428571e-06, |
|
"log_odds_chosen": -0.021141668781638145, |
|
"log_odds_ratio": -0.7178523540496826, |
|
"logits/chosen": -2.180445432662964, |
|
"logits/rejected": -2.216461658477783, |
|
"logps/chosen": -1.423187255859375, |
|
"logps/rejected": -1.4068344831466675, |
|
"loss": 1.7435, |
|
"nll_loss": 1.671682357788086, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.1423187255859375, |
|
"rewards/margins": -0.0016352771781384945, |
|
"rewards/rejected": -0.14068344235420227, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.010227609981051527, |
|
"grad_norm": 1.432656168937683, |
|
"learning_rate": 1e-05, |
|
"log_odds_chosen": -0.045114632695913315, |
|
"log_odds_ratio": -0.7312092781066895, |
|
"logits/chosen": -2.2011773586273193, |
|
"logits/rejected": -2.201080799102783, |
|
"logps/chosen": -1.5241130590438843, |
|
"logps/rejected": -1.4893730878829956, |
|
"loss": 1.793, |
|
"nll_loss": 1.719857931137085, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1524112969636917, |
|
"rewards/margins": -0.003474000608548522, |
|
"rewards/rejected": -0.14893729984760284, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011688697121201744, |
|
"grad_norm": 1.520970106124878, |
|
"learning_rate": 9.9999461653818e-06, |
|
"log_odds_chosen": -0.08909691870212555, |
|
"log_odds_ratio": -0.7692862153053284, |
|
"logits/chosen": -2.243410110473633, |
|
"logits/rejected": -2.2267463207244873, |
|
"logps/chosen": -1.446630597114563, |
|
"logps/rejected": -1.396761417388916, |
|
"loss": 1.759, |
|
"nll_loss": 1.682105541229248, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.14466306567192078, |
|
"rewards/margins": -0.00498693622648716, |
|
"rewards/rejected": -0.13967613875865936, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.013149784261351962, |
|
"grad_norm": 1.2674661874771118, |
|
"learning_rate": 9.999784662686462e-06, |
|
"log_odds_chosen": 0.03795226663351059, |
|
"log_odds_ratio": -0.7072824835777283, |
|
"logits/chosen": -2.1986892223358154, |
|
"logits/rejected": -2.2072536945343018, |
|
"logps/chosen": -1.4396700859069824, |
|
"logps/rejected": -1.471963882446289, |
|
"loss": 1.7775, |
|
"nll_loss": 1.7067829370498657, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.14396700263023376, |
|
"rewards/margins": 0.0032293866388499737, |
|
"rewards/rejected": -0.14719641208648682, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01461087140150218, |
|
"grad_norm": 0.984953761100769, |
|
"learning_rate": 9.999515495391765e-06, |
|
"log_odds_chosen": -0.1402886062860489, |
|
"log_odds_ratio": -0.7837027907371521, |
|
"logits/chosen": -2.173368453979492, |
|
"logits/rejected": -2.152172803878784, |
|
"logps/chosen": -1.3456827402114868, |
|
"logps/rejected": -1.2599360942840576, |
|
"loss": 1.6237, |
|
"nll_loss": 1.5452890396118164, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13456827402114868, |
|
"rewards/margins": -0.008574655279517174, |
|
"rewards/rejected": -0.12599360942840576, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016071958541652397, |
|
"grad_norm": 0.9352578520774841, |
|
"learning_rate": 9.999138669293913e-06, |
|
"log_odds_chosen": 0.030955376103520393, |
|
"log_odds_ratio": -0.6955101490020752, |
|
"logits/chosen": -2.242959976196289, |
|
"logits/rejected": -2.2378480434417725, |
|
"logps/chosen": -1.3454885482788086, |
|
"logps/rejected": -1.3571863174438477, |
|
"loss": 1.6984, |
|
"nll_loss": 1.6288717985153198, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.1345488578081131, |
|
"rewards/margins": 0.0011697756126523018, |
|
"rewards/rejected": -0.13571862876415253, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.017533045681802615, |
|
"grad_norm": 0.7604814171791077, |
|
"learning_rate": 9.998654192507421e-06, |
|
"log_odds_chosen": -0.025142917409539223, |
|
"log_odds_ratio": -0.7221760153770447, |
|
"logits/chosen": -2.1557188034057617, |
|
"logits/rejected": -2.181380271911621, |
|
"logps/chosen": -1.3569337129592896, |
|
"logps/rejected": -1.342216968536377, |
|
"loss": 1.6662, |
|
"nll_loss": 1.59402596950531, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.13569337129592896, |
|
"rewards/margins": -0.0014716808218508959, |
|
"rewards/rejected": -0.13422170281410217, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.018994132821952833, |
|
"grad_norm": 0.7423222064971924, |
|
"learning_rate": 9.998062075464943e-06, |
|
"log_odds_chosen": -0.11804741621017456, |
|
"log_odds_ratio": -0.7873090505599976, |
|
"logits/chosen": -2.200981616973877, |
|
"logits/rejected": -2.1934943199157715, |
|
"logps/chosen": -1.2868025302886963, |
|
"logps/rejected": -1.2002918720245361, |
|
"loss": 1.6073, |
|
"nll_loss": 1.5285530090332031, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.1286802589893341, |
|
"rewards/margins": -0.008651047013700008, |
|
"rewards/rejected": -0.12002921104431152, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.020455219962103054, |
|
"grad_norm": 0.6683437824249268, |
|
"learning_rate": 9.997362330917032e-06, |
|
"log_odds_chosen": -0.012934267520904541, |
|
"log_odds_ratio": -0.7275698184967041, |
|
"logits/chosen": -2.1948914527893066, |
|
"logits/rejected": -2.200413227081299, |
|
"logps/chosen": -1.3400592803955078, |
|
"logps/rejected": -1.3343003988265991, |
|
"loss": 1.6521, |
|
"nll_loss": 1.5793243646621704, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.13400591909885406, |
|
"rewards/margins": -0.0005758859915658832, |
|
"rewards/rejected": -0.13343004882335663, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02191630710225327, |
|
"grad_norm": 0.6963202357292175, |
|
"learning_rate": 9.996554973931884e-06, |
|
"log_odds_chosen": -0.11072862148284912, |
|
"log_odds_ratio": -0.7919189929962158, |
|
"logits/chosen": -2.1773252487182617, |
|
"logits/rejected": -2.162838935852051, |
|
"logps/chosen": -1.305843710899353, |
|
"logps/rejected": -1.2427345514297485, |
|
"loss": 1.6167, |
|
"nll_loss": 1.5374785661697388, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.13058437407016754, |
|
"rewards/margins": -0.006310915574431419, |
|
"rewards/rejected": -0.12427344918251038, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02337739424240349, |
|
"grad_norm": 0.6812885999679565, |
|
"learning_rate": 9.995640021894996e-06, |
|
"log_odds_chosen": -0.03462236002087593, |
|
"log_odds_ratio": -0.7293962836265564, |
|
"logits/chosen": -2.1697518825531006, |
|
"logits/rejected": -2.1682655811309814, |
|
"logps/chosen": -1.2563740015029907, |
|
"logps/rejected": -1.2293565273284912, |
|
"loss": 1.5666, |
|
"nll_loss": 1.4936596155166626, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.12563739717006683, |
|
"rewards/margins": -0.0027017316315323114, |
|
"rewards/rejected": -0.12293566763401031, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.024838481382553707, |
|
"grad_norm": 0.6587896943092346, |
|
"learning_rate": 9.994617494508811e-06, |
|
"log_odds_chosen": -0.010003458708524704, |
|
"log_odds_ratio": -0.7430539727210999, |
|
"logits/chosen": -2.200150966644287, |
|
"logits/rejected": -2.1906399726867676, |
|
"logps/chosen": -1.2153267860412598, |
|
"logps/rejected": -1.1962745189666748, |
|
"loss": 1.5641, |
|
"nll_loss": 1.4898183345794678, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.12153266370296478, |
|
"rewards/margins": -0.001905218348838389, |
|
"rewards/rejected": -0.119627445936203, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.026299568522703924, |
|
"grad_norm": 0.5830309987068176, |
|
"learning_rate": 9.993487413792276e-06, |
|
"log_odds_chosen": -0.1340780407190323, |
|
"log_odds_ratio": -0.7850849628448486, |
|
"logits/chosen": -2.1663918495178223, |
|
"logits/rejected": -2.145009756088257, |
|
"logps/chosen": -1.2580608129501343, |
|
"logps/rejected": -1.188957929611206, |
|
"loss": 1.5591, |
|
"nll_loss": 1.4806358814239502, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.1258060783147812, |
|
"rewards/margins": -0.006910297088325024, |
|
"rewards/rejected": -0.11889579892158508, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.027760655662854142, |
|
"grad_norm": 0.5536736249923706, |
|
"learning_rate": 9.992249804080372e-06, |
|
"log_odds_chosen": -0.021063022315502167, |
|
"log_odds_ratio": -0.7351462244987488, |
|
"logits/chosen": -2.1682121753692627, |
|
"logits/rejected": -2.163313150405884, |
|
"logps/chosen": -1.2965900897979736, |
|
"logps/rejected": -1.2690778970718384, |
|
"loss": 1.6294, |
|
"nll_loss": 1.5559338331222534, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.1296590119600296, |
|
"rewards/margins": -0.0027512230444699526, |
|
"rewards/rejected": -0.12690778076648712, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02922174280300436, |
|
"grad_norm": 0.5634390711784363, |
|
"learning_rate": 9.990904692023604e-06, |
|
"log_odds_chosen": 0.025263303890824318, |
|
"log_odds_ratio": -0.7174615263938904, |
|
"logits/chosen": -2.26141357421875, |
|
"logits/rejected": -2.250270366668701, |
|
"logps/chosen": -1.3806607723236084, |
|
"logps/rejected": -1.3875806331634521, |
|
"loss": 1.7203, |
|
"nll_loss": 1.6485247611999512, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13806606829166412, |
|
"rewards/margins": 0.0006919947918504477, |
|
"rewards/rejected": -0.1387580782175064, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030682829943154577, |
|
"grad_norm": 0.5434465408325195, |
|
"learning_rate": 9.989452106587406e-06, |
|
"log_odds_chosen": 0.012862562201917171, |
|
"log_odds_ratio": -0.7206666469573975, |
|
"logits/chosen": -2.1785006523132324, |
|
"logits/rejected": -2.181265354156494, |
|
"logps/chosen": -1.2523807287216187, |
|
"logps/rejected": -1.2531023025512695, |
|
"loss": 1.5781, |
|
"nll_loss": 1.5060465335845947, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": -0.1252380609512329, |
|
"rewards/margins": 7.216550875455141e-05, |
|
"rewards/rejected": -0.1253102421760559, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.032143917083304795, |
|
"grad_norm": 0.6211642026901245, |
|
"learning_rate": 9.987892079051531e-06, |
|
"log_odds_chosen": 0.012624351307749748, |
|
"log_odds_ratio": -0.7291357517242432, |
|
"logits/chosen": -2.179482936859131, |
|
"logits/rejected": -2.176547050476074, |
|
"logps/chosen": -1.3581693172454834, |
|
"logps/rejected": -1.3609216213226318, |
|
"loss": 1.6565, |
|
"nll_loss": 1.5835901498794556, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.13581693172454834, |
|
"rewards/margins": 0.0002752433065325022, |
|
"rewards/rejected": -0.1360921710729599, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03360500422345501, |
|
"grad_norm": 0.769701361656189, |
|
"learning_rate": 9.986224643009375e-06, |
|
"log_odds_chosen": 0.03776140883564949, |
|
"log_odds_ratio": -0.6954489946365356, |
|
"logits/chosen": -2.1131434440612793, |
|
"logits/rejected": -2.1170055866241455, |
|
"logps/chosen": -1.2192529439926147, |
|
"logps/rejected": -1.2428308725357056, |
|
"loss": 1.519, |
|
"nll_loss": 1.4494192600250244, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.12192529439926147, |
|
"rewards/margins": 0.0023577904794365168, |
|
"rewards/rejected": -0.1242830902338028, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03506609136360523, |
|
"grad_norm": 0.6579698324203491, |
|
"learning_rate": 9.984449834367251e-06, |
|
"log_odds_chosen": -0.08287765085697174, |
|
"log_odds_ratio": -0.7655948400497437, |
|
"logits/chosen": -2.117600440979004, |
|
"logits/rejected": -2.118288993835449, |
|
"logps/chosen": -1.3066436052322388, |
|
"logps/rejected": -1.2814935445785522, |
|
"loss": 1.5987, |
|
"nll_loss": 1.5221776962280273, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.13066436350345612, |
|
"rewards/margins": -0.0025150016881525517, |
|
"rewards/rejected": -0.1281493604183197, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03652717850375545, |
|
"grad_norm": 0.5370662808418274, |
|
"learning_rate": 9.982567691343617e-06, |
|
"log_odds_chosen": -0.041301436722278595, |
|
"log_odds_ratio": -0.7494814395904541, |
|
"logits/chosen": -2.1119844913482666, |
|
"logits/rejected": -2.1228156089782715, |
|
"logps/chosen": -1.2678842544555664, |
|
"logps/rejected": -1.2463194131851196, |
|
"loss": 1.5457, |
|
"nll_loss": 1.4707541465759277, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.1267884224653244, |
|
"rewards/margins": -0.0021564930211752653, |
|
"rewards/rejected": -0.12463192641735077, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.037988265643905665, |
|
"grad_norm": 0.5143423080444336, |
|
"learning_rate": 9.980578254468252e-06, |
|
"log_odds_chosen": -0.05193250998854637, |
|
"log_odds_ratio": -0.7482390403747559, |
|
"logits/chosen": -2.1540181636810303, |
|
"logits/rejected": -2.136885166168213, |
|
"logps/chosen": -1.3168916702270508, |
|
"logps/rejected": -1.2728286981582642, |
|
"loss": 1.6123, |
|
"nll_loss": 1.5374691486358643, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": -0.1316891610622406, |
|
"rewards/margins": -0.004406292457133532, |
|
"rewards/rejected": -0.12728287279605865, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03944935278405589, |
|
"grad_norm": 0.5746509432792664, |
|
"learning_rate": 9.978481566581388e-06, |
|
"log_odds_chosen": -0.09941092133522034, |
|
"log_odds_ratio": -0.7670192122459412, |
|
"logits/chosen": -2.0658328533172607, |
|
"logits/rejected": -2.053611993789673, |
|
"logps/chosen": -1.3229994773864746, |
|
"logps/rejected": -1.2689064741134644, |
|
"loss": 1.5754, |
|
"nll_loss": 1.4986768960952759, |
|
"rewards/accuracies": 0.265625, |
|
"rewards/chosen": -0.13229995965957642, |
|
"rewards/margins": -0.005409288220107555, |
|
"rewards/rejected": -0.1268906593322754, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04091043992420611, |
|
"grad_norm": 0.5285203456878662, |
|
"learning_rate": 9.976277672832782e-06, |
|
"log_odds_chosen": -0.009130319580435753, |
|
"log_odds_ratio": -0.7242738008499146, |
|
"logits/chosen": -2.119077682495117, |
|
"logits/rejected": -2.109398365020752, |
|
"logps/chosen": -1.311572790145874, |
|
"logps/rejected": -1.3026206493377686, |
|
"loss": 1.596, |
|
"nll_loss": 1.5235313177108765, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.1311572790145874, |
|
"rewards/margins": -0.0008952060015872121, |
|
"rewards/rejected": -0.1302620768547058, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.042371527064356325, |
|
"grad_norm": 0.5054935812950134, |
|
"learning_rate": 9.973966620680741e-06, |
|
"log_odds_chosen": 9.676720947027206e-05, |
|
"log_odds_ratio": -0.7112984657287598, |
|
"logits/chosen": -2.0804967880249023, |
|
"logits/rejected": -2.0964128971099854, |
|
"logps/chosen": -1.3590530157089233, |
|
"logps/rejected": -1.3668309450149536, |
|
"loss": 1.6081, |
|
"nll_loss": 1.5369728803634644, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.13590531051158905, |
|
"rewards/margins": 0.0007777921855449677, |
|
"rewards/rejected": -0.13668310642242432, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04383261420450654, |
|
"grad_norm": 0.5947529077529907, |
|
"learning_rate": 9.971548459891113e-06, |
|
"log_odds_chosen": -0.005686625838279724, |
|
"log_odds_ratio": -0.7236043810844421, |
|
"logits/chosen": -2.040478229522705, |
|
"logits/rejected": -2.050001859664917, |
|
"logps/chosen": -1.3079044818878174, |
|
"logps/rejected": -1.3112431764602661, |
|
"loss": 1.6105, |
|
"nll_loss": 1.5381548404693604, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.13079045712947845, |
|
"rewards/margins": 0.0003338647074997425, |
|
"rewards/rejected": -0.1311243176460266, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04529370134465676, |
|
"grad_norm": 0.541822075843811, |
|
"learning_rate": 9.969023242536206e-06, |
|
"log_odds_chosen": -0.03166639059782028, |
|
"log_odds_ratio": -0.7292428612709045, |
|
"logits/chosen": -2.040898323059082, |
|
"logits/rejected": -2.0398669242858887, |
|
"logps/chosen": -1.2686175107955933, |
|
"logps/rejected": -1.261348843574524, |
|
"loss": 1.541, |
|
"nll_loss": 1.4681065082550049, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.12686176598072052, |
|
"rewards/margins": -0.0007268765475600958, |
|
"rewards/rejected": -0.12613488733768463, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04675478848480698, |
|
"grad_norm": 0.6896752715110779, |
|
"learning_rate": 9.966391022993658e-06, |
|
"log_odds_chosen": 0.13857628405094147, |
|
"log_odds_ratio": -0.6746503114700317, |
|
"logits/chosen": -2.005702495574951, |
|
"logits/rejected": -2.009239912033081, |
|
"logps/chosen": -1.14279043674469, |
|
"logps/rejected": -1.2037732601165771, |
|
"loss": 1.475, |
|
"nll_loss": 1.4074950218200684, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.11427903920412064, |
|
"rewards/margins": 0.00609829043969512, |
|
"rewards/rejected": -0.12037733197212219, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.048215875624957195, |
|
"grad_norm": 0.4958839416503906, |
|
"learning_rate": 9.963651857945286e-06, |
|
"log_odds_chosen": -0.07793000340461731, |
|
"log_odds_ratio": -0.7607824206352234, |
|
"logits/chosen": -2.067959785461426, |
|
"logits/rejected": -2.06540584564209, |
|
"logps/chosen": -1.3159579038619995, |
|
"logps/rejected": -1.2743453979492188, |
|
"loss": 1.657, |
|
"nll_loss": 1.5809520483016968, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.13159577548503876, |
|
"rewards/margins": -0.004161248914897442, |
|
"rewards/rejected": -0.12743453681468964, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04967696276510741, |
|
"grad_norm": 0.530023992061615, |
|
"learning_rate": 9.960805806375854e-06, |
|
"log_odds_chosen": 0.017400667071342468, |
|
"log_odds_ratio": -0.7287170886993408, |
|
"logits/chosen": -2.049931526184082, |
|
"logits/rejected": -2.067579984664917, |
|
"logps/chosen": -1.313307285308838, |
|
"logps/rejected": -1.3060802221298218, |
|
"loss": 1.5791, |
|
"nll_loss": 1.506237268447876, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.1313307285308838, |
|
"rewards/margins": -0.0007226967136375606, |
|
"rewards/rejected": -0.13060802221298218, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05113804990525763, |
|
"grad_norm": 0.5267317891120911, |
|
"learning_rate": 9.957852929571799e-06, |
|
"log_odds_chosen": -0.08755096048116684, |
|
"log_odds_ratio": -0.7537316679954529, |
|
"logits/chosen": -2.0170559883117676, |
|
"logits/rejected": -2.0241918563842773, |
|
"logps/chosen": -1.3282378911972046, |
|
"logps/rejected": -1.267946481704712, |
|
"loss": 1.5959, |
|
"nll_loss": 1.5205148458480835, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.13282378017902374, |
|
"rewards/margins": -0.006029147654771805, |
|
"rewards/rejected": -0.12679465115070343, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05259913704540785, |
|
"grad_norm": 0.5598548054695129, |
|
"learning_rate": 9.954793291119917e-06, |
|
"log_odds_chosen": -0.07335270196199417, |
|
"log_odds_ratio": -0.7724658846855164, |
|
"logits/chosen": -2.0005574226379395, |
|
"logits/rejected": -1.985095739364624, |
|
"logps/chosen": -1.2228074073791504, |
|
"logps/rejected": -1.1555684804916382, |
|
"loss": 1.5164, |
|
"nll_loss": 1.4391915798187256, |
|
"rewards/accuracies": 0.359375, |
|
"rewards/chosen": -0.12228074669837952, |
|
"rewards/margins": -0.006723896134644747, |
|
"rewards/rejected": -0.11555685847997665, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.054060224185558066, |
|
"grad_norm": 0.5158045887947083, |
|
"learning_rate": 9.951626956906001e-06, |
|
"log_odds_chosen": -0.1070481538772583, |
|
"log_odds_ratio": -0.767937421798706, |
|
"logits/chosen": -2.022592067718506, |
|
"logits/rejected": -2.0269908905029297, |
|
"logps/chosen": -1.3267402648925781, |
|
"logps/rejected": -1.2489224672317505, |
|
"loss": 1.5828, |
|
"nll_loss": 1.5060540437698364, |
|
"rewards/accuracies": 0.390625, |
|
"rewards/chosen": -0.13267403841018677, |
|
"rewards/margins": -0.0077817970886826515, |
|
"rewards/rejected": -0.1248922348022461, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.055521311325708284, |
|
"grad_norm": 0.5369778275489807, |
|
"learning_rate": 9.948353995113405e-06, |
|
"log_odds_chosen": 0.02357397973537445, |
|
"log_odds_ratio": -0.7277624011039734, |
|
"logits/chosen": -1.971970796585083, |
|
"logits/rejected": -2.0005533695220947, |
|
"logps/chosen": -1.190409541130066, |
|
"logps/rejected": -1.1846762895584106, |
|
"loss": 1.4772, |
|
"nll_loss": 1.404414176940918, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.11904095858335495, |
|
"rewards/margins": -0.0005733318976126611, |
|
"rewards/rejected": -0.11846762895584106, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0569823984658585, |
|
"grad_norm": 0.5577079057693481, |
|
"learning_rate": 9.944974476221587e-06, |
|
"log_odds_chosen": 0.05960956960916519, |
|
"log_odds_ratio": -0.694072425365448, |
|
"logits/chosen": -2.0205910205841064, |
|
"logits/rejected": -2.010025978088379, |
|
"logps/chosen": -1.2545329332351685, |
|
"logps/rejected": -1.2769423723220825, |
|
"loss": 1.5708, |
|
"nll_loss": 1.5014022588729858, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.12545329332351685, |
|
"rewards/margins": 0.002240956760942936, |
|
"rewards/rejected": -0.1276942491531372, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05844348560600872, |
|
"grad_norm": 0.5200385451316833, |
|
"learning_rate": 9.941488473004594e-06, |
|
"log_odds_chosen": -0.016573341563344002, |
|
"log_odds_ratio": -0.7171596884727478, |
|
"logits/chosen": -2.0057106018066406, |
|
"logits/rejected": -2.004528760910034, |
|
"logps/chosen": -1.4052870273590088, |
|
"logps/rejected": -1.3805549144744873, |
|
"loss": 1.6515, |
|
"nll_loss": 1.5797675848007202, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.14052869379520416, |
|
"rewards/margins": -0.0024731969460844994, |
|
"rewards/rejected": -0.13805550336837769, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.059904572746158936, |
|
"grad_norm": 0.5273575186729431, |
|
"learning_rate": 9.937896060529485e-06, |
|
"log_odds_chosen": -0.08113294094800949, |
|
"log_odds_ratio": -0.7743253707885742, |
|
"logits/chosen": -1.9785962104797363, |
|
"logits/rejected": -1.9705551862716675, |
|
"logps/chosen": -1.2821004390716553, |
|
"logps/rejected": -1.208033561706543, |
|
"loss": 1.5836, |
|
"nll_loss": 1.5061376094818115, |
|
"rewards/accuracies": 0.234375, |
|
"rewards/chosen": -0.12821003794670105, |
|
"rewards/margins": -0.007406666409224272, |
|
"rewards/rejected": -0.12080337107181549, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.061365659886309154, |
|
"grad_norm": 0.5236508250236511, |
|
"learning_rate": 9.934197316154721e-06, |
|
"log_odds_chosen": -0.13003796339035034, |
|
"log_odds_ratio": -0.7939882278442383, |
|
"logits/chosen": -1.9101815223693848, |
|
"logits/rejected": -1.943664312362671, |
|
"logps/chosen": -1.155348777770996, |
|
"logps/rejected": -1.0918152332305908, |
|
"loss": 1.4569, |
|
"nll_loss": 1.377458095550537, |
|
"rewards/accuracies": 0.328125, |
|
"rewards/chosen": -0.11553487926721573, |
|
"rewards/margins": -0.006353363860398531, |
|
"rewards/rejected": -0.10918151587247849, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06282674702645938, |
|
"grad_norm": 0.4699207842350006, |
|
"learning_rate": 9.9303923195285e-06, |
|
"log_odds_chosen": -0.041184213012456894, |
|
"log_odds_ratio": -0.7406144142150879, |
|
"logits/chosen": -1.9506869316101074, |
|
"logits/rejected": -1.9735560417175293, |
|
"logps/chosen": -1.2942121028900146, |
|
"logps/rejected": -1.2584986686706543, |
|
"loss": 1.5464, |
|
"nll_loss": 1.4723409414291382, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.12942121922969818, |
|
"rewards/margins": -0.0035713440738618374, |
|
"rewards/rejected": -0.1258498728275299, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06428783416660959, |
|
"grad_norm": 0.5235589742660522, |
|
"learning_rate": 9.92648115258704e-06, |
|
"log_odds_chosen": 0.04650488868355751, |
|
"log_odds_ratio": -0.7010443806648254, |
|
"logits/chosen": -1.9501118659973145, |
|
"logits/rejected": -1.9561800956726074, |
|
"logps/chosen": -1.1499981880187988, |
|
"logps/rejected": -1.1835464239120483, |
|
"loss": 1.4224, |
|
"nll_loss": 1.3523142337799072, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11499983072280884, |
|
"rewards/margins": 0.003354821354150772, |
|
"rewards/rejected": -0.11835464835166931, |
|
"step": 44 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 684, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|