|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 400, |
|
"global_step": 938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010660980810234541, |
|
"grad_norm": 5.5463102558146335, |
|
"learning_rate": 5.3191489361702125e-09, |
|
"logits/chosen": -0.48140522837638855, |
|
"logits/rejected": -0.7986129522323608, |
|
"logps/chosen": -160.70640563964844, |
|
"logps/rejected": -136.7216033935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005330490405117271, |
|
"grad_norm": 5.944179098618821, |
|
"learning_rate": 2.6595744680851062e-08, |
|
"logits/chosen": -0.5258230566978455, |
|
"logits/rejected": -0.640978991985321, |
|
"logps/chosen": -143.9716033935547, |
|
"logps/rejected": -130.26953125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.0004923552623949945, |
|
"rewards/margins": -0.0009530532988719642, |
|
"rewards/rejected": 0.00046069800737313926, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010660980810234541, |
|
"grad_norm": 4.877499599443773, |
|
"learning_rate": 5.3191489361702123e-08, |
|
"logits/chosen": -0.45687875151634216, |
|
"logits/rejected": -0.633367121219635, |
|
"logps/chosen": -171.5751190185547, |
|
"logps/rejected": -156.70230102539062, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.38749998807907104, |
|
"rewards/chosen": -0.0014754905132576823, |
|
"rewards/margins": -0.0020646383054554462, |
|
"rewards/rejected": 0.0005891475593671203, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015991471215351813, |
|
"grad_norm": 4.928018921113954, |
|
"learning_rate": 7.978723404255319e-08, |
|
"logits/chosen": -0.5080267190933228, |
|
"logits/rejected": -0.6690904498100281, |
|
"logps/chosen": -168.29055786132812, |
|
"logps/rejected": -155.68568420410156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00017321776249445975, |
|
"rewards/margins": 0.0004894703743048012, |
|
"rewards/rejected": -0.00031625264091417193, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.021321961620469083, |
|
"grad_norm": 5.502864121859809, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.49741801619529724, |
|
"logits/rejected": -0.6529160141944885, |
|
"logps/chosen": -158.0856475830078, |
|
"logps/rejected": -141.2250518798828, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0007460988126695156, |
|
"rewards/margins": 0.0009390910854563117, |
|
"rewards/rejected": -0.00019299241830594838, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.026652452025586353, |
|
"grad_norm": 5.3161185644529905, |
|
"learning_rate": 1.329787234042553e-07, |
|
"logits/chosen": -0.46866098046302795, |
|
"logits/rejected": -0.5745824575424194, |
|
"logps/chosen": -156.7174835205078, |
|
"logps/rejected": -144.2376251220703, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0008935723453760147, |
|
"rewards/margins": -0.0009486509370617568, |
|
"rewards/rejected": 5.507881360244937e-05, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.031982942430703626, |
|
"grad_norm": 5.272912501828491, |
|
"learning_rate": 1.5957446808510638e-07, |
|
"logits/chosen": -0.49024850130081177, |
|
"logits/rejected": -0.6033456921577454, |
|
"logps/chosen": -157.64395141601562, |
|
"logps/rejected": -150.4394073486328, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0018176069715991616, |
|
"rewards/margins": 0.0021634683944284916, |
|
"rewards/rejected": -0.0003458613937254995, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 5.55755087436118, |
|
"learning_rate": 1.8617021276595742e-07, |
|
"logits/chosen": -0.5024099349975586, |
|
"logits/rejected": -0.5742695927619934, |
|
"logps/chosen": -162.9497528076172, |
|
"logps/rejected": -156.5416259765625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0004840154724661261, |
|
"rewards/margins": 4.346743298810907e-05, |
|
"rewards/rejected": 0.00044054799946025014, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.042643923240938165, |
|
"grad_norm": 5.050573104575282, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.4654630124568939, |
|
"logits/rejected": -0.5773854851722717, |
|
"logps/chosen": -153.1385955810547, |
|
"logps/rejected": -147.40850830078125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00016260957636404783, |
|
"rewards/margins": 0.0010524257086217403, |
|
"rewards/rejected": -0.0008898162050172687, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04797441364605544, |
|
"grad_norm": 5.699569850833138, |
|
"learning_rate": 2.393617021276596e-07, |
|
"logits/chosen": -0.4928715229034424, |
|
"logits/rejected": -0.6359135508537292, |
|
"logps/chosen": -155.63232421875, |
|
"logps/rejected": -143.79296875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0014595793327316642, |
|
"rewards/margins": 0.001696806401014328, |
|
"rewards/rejected": -0.00023722714104223996, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.053304904051172705, |
|
"grad_norm": 5.124728866824331, |
|
"learning_rate": 2.659574468085106e-07, |
|
"logits/chosen": -0.43387550115585327, |
|
"logits/rejected": -0.5658468008041382, |
|
"logps/chosen": -175.50062561035156, |
|
"logps/rejected": -154.56787109375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0016532255103811622, |
|
"rewards/margins": 0.0013198386877775192, |
|
"rewards/rejected": 0.0003333869099151343, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05863539445628998, |
|
"grad_norm": 5.56223084484684, |
|
"learning_rate": 2.925531914893617e-07, |
|
"logits/chosen": -0.5003554224967957, |
|
"logits/rejected": -0.6052166223526001, |
|
"logps/chosen": -151.86026000976562, |
|
"logps/rejected": -144.47586059570312, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0034173422027379274, |
|
"rewards/margins": 0.0018555650021880865, |
|
"rewards/rejected": 0.0015617769677191973, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06396588486140725, |
|
"grad_norm": 5.412628405701925, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.42743635177612305, |
|
"logits/rejected": -0.5739923715591431, |
|
"logps/chosen": -147.76356506347656, |
|
"logps/rejected": -135.29502868652344, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004091166891157627, |
|
"rewards/margins": 0.0018434191588312387, |
|
"rewards/rejected": 0.0022477474994957447, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06929637526652452, |
|
"grad_norm": 5.442775471516293, |
|
"learning_rate": 3.457446808510638e-07, |
|
"logits/chosen": -0.522619366645813, |
|
"logits/rejected": -0.6582551002502441, |
|
"logps/chosen": -162.0552520751953, |
|
"logps/rejected": -147.86856079101562, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005479807965457439, |
|
"rewards/margins": 0.004093030467629433, |
|
"rewards/rejected": 0.001386777381412685, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 5.634942637913951, |
|
"learning_rate": 3.7234042553191484e-07, |
|
"logits/chosen": -0.5439807772636414, |
|
"logits/rejected": -0.6844218969345093, |
|
"logps/chosen": -156.98483276367188, |
|
"logps/rejected": -140.76651000976562, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0070900036953389645, |
|
"rewards/margins": 0.00586737459525466, |
|
"rewards/rejected": 0.0012226292164996266, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07995735607675906, |
|
"grad_norm": 4.9084292553173166, |
|
"learning_rate": 3.989361702127659e-07, |
|
"logits/chosen": -0.4480295181274414, |
|
"logits/rejected": -0.5396173596382141, |
|
"logps/chosen": -149.27737426757812, |
|
"logps/rejected": -138.79080200195312, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0054114703088998795, |
|
"rewards/margins": 0.004020148888230324, |
|
"rewards/rejected": 0.0013913216535001993, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 5.243856701834545, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.5261912941932678, |
|
"logits/rejected": -0.6559049487113953, |
|
"logps/chosen": -155.7753143310547, |
|
"logps/rejected": -146.03860473632812, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.007854573428630829, |
|
"rewards/margins": 0.004827320575714111, |
|
"rewards/rejected": 0.0030272528529167175, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0906183368869936, |
|
"grad_norm": 5.506781468585061, |
|
"learning_rate": 4.5212765957446806e-07, |
|
"logits/chosen": -0.5435600876808167, |
|
"logits/rejected": -0.6427361369132996, |
|
"logps/chosen": -136.4017791748047, |
|
"logps/rejected": -130.49105834960938, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.00461820513010025, |
|
"rewards/margins": 0.004016582854092121, |
|
"rewards/rejected": 0.0006016212282702327, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09594882729211088, |
|
"grad_norm": 5.811357371598748, |
|
"learning_rate": 4.787234042553192e-07, |
|
"logits/chosen": -0.5588937401771545, |
|
"logits/rejected": -0.6763302087783813, |
|
"logps/chosen": -157.134521484375, |
|
"logps/rejected": -146.5879364013672, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.008327952586114407, |
|
"rewards/margins": 0.008614275604486465, |
|
"rewards/rejected": -0.0002863233967218548, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10127931769722814, |
|
"grad_norm": 5.698422927582689, |
|
"learning_rate": 4.999982680938129e-07, |
|
"logits/chosen": -0.5830127596855164, |
|
"logits/rejected": -0.7297841310501099, |
|
"logps/chosen": -165.22900390625, |
|
"logps/rejected": -152.13014221191406, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01156298816204071, |
|
"rewards/margins": 0.011255884543061256, |
|
"rewards/rejected": 0.00030710286227986217, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.10660980810234541, |
|
"grad_norm": 5.540011917380718, |
|
"learning_rate": 4.999376538968061e-07, |
|
"logits/chosen": -0.5830188393592834, |
|
"logits/rejected": -0.6362646222114563, |
|
"logps/chosen": -160.86549377441406, |
|
"logps/rejected": -151.5850372314453, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.007453514728695154, |
|
"rewards/margins": 0.0087841572239995, |
|
"rewards/rejected": -0.0013306414475664496, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 5.554459962070174, |
|
"learning_rate": 4.997904683849418e-07, |
|
"logits/chosen": -0.6047431826591492, |
|
"logits/rejected": -0.7156568765640259, |
|
"logps/chosen": -145.95703125, |
|
"logps/rejected": -137.16812133789062, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.003936653956770897, |
|
"rewards/margins": 0.009741699323058128, |
|
"rewards/rejected": -0.013678351417183876, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.11727078891257996, |
|
"grad_norm": 6.6635480922191395, |
|
"learning_rate": 4.99556762539107e-07, |
|
"logits/chosen": -0.5515817403793335, |
|
"logits/rejected": -0.7226412296295166, |
|
"logps/chosen": -163.9252166748047, |
|
"logps/rejected": -151.83767700195312, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.0011928931344300508, |
|
"rewards/margins": 0.012570838443934917, |
|
"rewards/rejected": -0.013763731345534325, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12260127931769722, |
|
"grad_norm": 5.957095370833089, |
|
"learning_rate": 4.992366173083787e-07, |
|
"logits/chosen": -0.586641788482666, |
|
"logits/rejected": -0.7417147159576416, |
|
"logps/chosen": -161.9275360107422, |
|
"logps/rejected": -145.07772827148438, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.003873241599649191, |
|
"rewards/margins": 0.01244218461215496, |
|
"rewards/rejected": -0.01631542667746544, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1279317697228145, |
|
"grad_norm": 5.703633835475685, |
|
"learning_rate": 4.988301435819852e-07, |
|
"logits/chosen": -0.5778621435165405, |
|
"logits/rejected": -0.6562256217002869, |
|
"logps/chosen": -164.537353515625, |
|
"logps/rejected": -152.1725616455078, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.007811696734279394, |
|
"rewards/margins": 0.015073996968567371, |
|
"rewards/rejected": -0.022885693237185478, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13326226012793177, |
|
"grad_norm": 7.3435341527621585, |
|
"learning_rate": 4.983374821508973e-07, |
|
"logits/chosen": -0.6186214685440063, |
|
"logits/rejected": -0.7367585301399231, |
|
"logps/chosen": -190.20452880859375, |
|
"logps/rejected": -183.6043243408203, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.008534837514162064, |
|
"rewards/margins": 0.027956834062933922, |
|
"rewards/rejected": -0.019421998411417007, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13859275053304904, |
|
"grad_norm": 6.188992862485417, |
|
"learning_rate": 4.977588036590624e-07, |
|
"logits/chosen": -0.6698447465896606, |
|
"logits/rejected": -0.7765822410583496, |
|
"logps/chosen": -157.9294891357422, |
|
"logps/rejected": -146.48617553710938, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.013769884593784809, |
|
"rewards/margins": 0.021815448999404907, |
|
"rewards/rejected": -0.03558532893657684, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1439232409381663, |
|
"grad_norm": 5.78355447268637, |
|
"learning_rate": 4.970943085442984e-07, |
|
"logits/chosen": -0.6052809953689575, |
|
"logits/rejected": -0.768462061882019, |
|
"logps/chosen": -156.30868530273438, |
|
"logps/rejected": -149.22007751464844, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.02559695765376091, |
|
"rewards/margins": 0.03413590043783188, |
|
"rewards/rejected": -0.05973286181688309, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 5.813443617152644, |
|
"learning_rate": 4.96344226968867e-07, |
|
"logits/chosen": -0.6367892026901245, |
|
"logits/rejected": -0.7320101857185364, |
|
"logps/chosen": -162.81149291992188, |
|
"logps/rejected": -153.95095825195312, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.03205486014485359, |
|
"rewards/margins": 0.017323989421129227, |
|
"rewards/rejected": -0.04937884956598282, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15458422174840086, |
|
"grad_norm": 6.2002717305065, |
|
"learning_rate": 4.955088187397534e-07, |
|
"logits/chosen": -0.7039578557014465, |
|
"logits/rejected": -0.8707769513130188, |
|
"logps/chosen": -176.5757598876953, |
|
"logps/rejected": -168.57083129882812, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04023490846157074, |
|
"rewards/margins": 0.030298087745904922, |
|
"rewards/rejected": -0.07053300738334656, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.15991471215351813, |
|
"grad_norm": 5.905902869206233, |
|
"learning_rate": 4.945883732186751e-07, |
|
"logits/chosen": -0.6456910371780396, |
|
"logits/rejected": -0.8251630067825317, |
|
"logps/chosen": -141.7700653076172, |
|
"logps/rejected": -129.72817993164062, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0674080178141594, |
|
"rewards/margins": 0.03635396808385849, |
|
"rewards/rejected": -0.10376199334859848, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1652452025586354, |
|
"grad_norm": 6.258969756632891, |
|
"learning_rate": 4.935832092218558e-07, |
|
"logits/chosen": -0.724746823310852, |
|
"logits/rejected": -0.842291533946991, |
|
"logps/chosen": -159.5133514404297, |
|
"logps/rejected": -152.9370574951172, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04968777671456337, |
|
"rewards/margins": 0.04536419361829758, |
|
"rewards/rejected": -0.09505197405815125, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 6.094426950794661, |
|
"learning_rate": 4.924936749095969e-07, |
|
"logits/chosen": -0.6918126344680786, |
|
"logits/rejected": -0.7708092331886292, |
|
"logps/chosen": -167.59994506835938, |
|
"logps/rejected": -160.00057983398438, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04972660169005394, |
|
"rewards/margins": 0.05021023750305176, |
|
"rewards/rejected": -0.0999368354678154, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17590618336886993, |
|
"grad_norm": 6.3293548148521905, |
|
"learning_rate": 4.913201476656838e-07, |
|
"logits/chosen": -0.7461433410644531, |
|
"logits/rejected": -0.8420252799987793, |
|
"logps/chosen": -161.7245330810547, |
|
"logps/rejected": -157.357177734375, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05842015892267227, |
|
"rewards/margins": 0.06748794764280319, |
|
"rewards/rejected": -0.12590810656547546, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1812366737739872, |
|
"grad_norm": 6.5840976852855, |
|
"learning_rate": 4.900630339666717e-07, |
|
"logits/chosen": -0.7366148829460144, |
|
"logits/rejected": -0.8815475702285767, |
|
"logps/chosen": -183.88925170898438, |
|
"logps/rejected": -176.22451782226562, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08390282094478607, |
|
"rewards/margins": 0.05011892318725586, |
|
"rewards/rejected": -0.13402177393436432, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 6.234539356652731, |
|
"learning_rate": 4.88722769241093e-07, |
|
"logits/chosen": -0.6534587144851685, |
|
"logits/rejected": -0.7359489798545837, |
|
"logps/chosen": -156.6703338623047, |
|
"logps/rejected": -150.93853759765625, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07284261286258698, |
|
"rewards/margins": 0.05436049774289131, |
|
"rewards/rejected": -0.127203106880188, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.19189765458422176, |
|
"grad_norm": 6.610956936672055, |
|
"learning_rate": 4.872998177186375e-07, |
|
"logits/chosen": -0.666496992111206, |
|
"logits/rejected": -0.7403326034545898, |
|
"logps/chosen": -160.12982177734375, |
|
"logps/rejected": -157.0307159423828, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.11026358604431152, |
|
"rewards/margins": 0.055197346955537796, |
|
"rewards/rejected": -0.16546092927455902, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19722814498933902, |
|
"grad_norm": 7.023716688535026, |
|
"learning_rate": 4.857946722693566e-07, |
|
"logits/chosen": -0.7221956849098206, |
|
"logits/rejected": -0.9377690553665161, |
|
"logps/chosen": -169.1468048095703, |
|
"logps/rejected": -154.34695434570312, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.09703753888607025, |
|
"rewards/margins": 0.06947065889835358, |
|
"rewards/rejected": -0.16650819778442383, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2025586353944563, |
|
"grad_norm": 7.14685316792679, |
|
"learning_rate": 4.842078542329463e-07, |
|
"logits/chosen": -0.8102830648422241, |
|
"logits/rejected": -0.892846941947937, |
|
"logps/chosen": -161.26129150390625, |
|
"logps/rejected": -156.14747619628906, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.08616851270198822, |
|
"rewards/margins": 0.06732877343893051, |
|
"rewards/rejected": -0.15349729359149933, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20788912579957355, |
|
"grad_norm": 6.752366375502548, |
|
"learning_rate": 4.825399132381714e-07, |
|
"logits/chosen": -0.6987568736076355, |
|
"logits/rejected": -0.8175935745239258, |
|
"logps/chosen": -171.1442108154297, |
|
"logps/rejected": -166.8987579345703, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.10287340730428696, |
|
"rewards/margins": 0.05335085466504097, |
|
"rewards/rejected": -0.15622428059577942, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.21321961620469082, |
|
"grad_norm": 7.347783152614852, |
|
"learning_rate": 4.807914270124876e-07, |
|
"logits/chosen": -0.7212746143341064, |
|
"logits/rejected": -0.8661853671073914, |
|
"logps/chosen": -154.50978088378906, |
|
"logps/rejected": -151.14486694335938, |
|
"loss": 0.6607, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13088169693946838, |
|
"rewards/margins": 0.07638157904148102, |
|
"rewards/rejected": -0.2072632759809494, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21855010660980811, |
|
"grad_norm": 7.628582115766163, |
|
"learning_rate": 4.789630011819354e-07, |
|
"logits/chosen": -0.8047178387641907, |
|
"logits/rejected": -0.9235810041427612, |
|
"logps/chosen": -172.83328247070312, |
|
"logps/rejected": -167.29969787597656, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.13691547513008118, |
|
"rewards/margins": 0.08130868524312973, |
|
"rewards/rejected": -0.21822413802146912, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 8.158484143715567, |
|
"learning_rate": 4.770552690613665e-07, |
|
"logits/chosen": -0.6994116902351379, |
|
"logits/rejected": -0.8224090337753296, |
|
"logps/chosen": -165.53271484375, |
|
"logps/rejected": -160.39566040039062, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15383288264274597, |
|
"rewards/margins": 0.08487533777952194, |
|
"rewards/rejected": -0.23870821297168732, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22921108742004265, |
|
"grad_norm": 6.818497799203832, |
|
"learning_rate": 4.750688914350824e-07, |
|
"logits/chosen": -0.7993873357772827, |
|
"logits/rejected": -0.8913162350654602, |
|
"logps/chosen": -168.80471801757812, |
|
"logps/rejected": -161.65472412109375, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.175734743475914, |
|
"rewards/margins": 0.0465971902012825, |
|
"rewards/rejected": -0.2223319262266159, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2345415778251599, |
|
"grad_norm": 6.951350944403186, |
|
"learning_rate": 4.730045563279577e-07, |
|
"logits/chosen": -0.8045557141304016, |
|
"logits/rejected": -0.9918710589408875, |
|
"logps/chosen": -166.38870239257812, |
|
"logps/rejected": -160.87380981445312, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1676856279373169, |
|
"rewards/margins": 0.08295993506908417, |
|
"rewards/rejected": -0.25064557790756226, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23987206823027718, |
|
"grad_norm": 8.045186231414235, |
|
"learning_rate": 4.708629787671268e-07, |
|
"logits/chosen": -0.7760337591171265, |
|
"logits/rejected": -0.9154524803161621, |
|
"logps/chosen": -176.33999633789062, |
|
"logps/rejected": -174.62783813476562, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.17208227515220642, |
|
"rewards/margins": 0.07876059412956238, |
|
"rewards/rejected": -0.2508428692817688, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.24520255863539445, |
|
"grad_norm": 8.063146353723733, |
|
"learning_rate": 4.6864490053432e-07, |
|
"logits/chosen": -0.8260966539382935, |
|
"logits/rejected": -0.9618522524833679, |
|
"logps/chosen": -181.78347778320312, |
|
"logps/rejected": -169.95745849609375, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.19365951418876648, |
|
"rewards/margins": 0.0855235755443573, |
|
"rewards/rejected": -0.2791830897331238, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2505330490405117, |
|
"grad_norm": 8.049754139869536, |
|
"learning_rate": 4.6635108990893033e-07, |
|
"logits/chosen": -0.7784782648086548, |
|
"logits/rejected": -0.9096555709838867, |
|
"logps/chosen": -183.8284454345703, |
|
"logps/rejected": -179.8243408203125, |
|
"loss": 0.6471, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.22320905327796936, |
|
"rewards/margins": 0.07317076623439789, |
|
"rewards/rejected": -0.29637983441352844, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 7.983403287573849, |
|
"learning_rate": 4.6398234140190413e-07, |
|
"logits/chosen": -0.7042727470397949, |
|
"logits/rejected": -0.8362523317337036, |
|
"logps/chosen": -177.31149291992188, |
|
"logps/rejected": -171.16531372070312, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.2963384687900543, |
|
"rewards/margins": 0.054160721600055695, |
|
"rewards/rejected": -0.3504992127418518, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 8.675362509945584, |
|
"learning_rate": 4.615394754805443e-07, |
|
"logits/chosen": -0.802803635597229, |
|
"logits/rejected": -0.8539141416549683, |
|
"logps/chosen": -185.90289306640625, |
|
"logps/rejected": -192.5127716064453, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.28103405237197876, |
|
"rewards/margins": 0.13215723633766174, |
|
"rewards/rejected": -0.4131912589073181, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.26652452025586354, |
|
"grad_norm": 8.803685601945949, |
|
"learning_rate": 4.5902333828432416e-07, |
|
"logits/chosen": -0.7753912210464478, |
|
"logits/rejected": -0.9024080038070679, |
|
"logps/chosen": -181.60507202148438, |
|
"logps/rejected": -184.08474731445312, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.34952813386917114, |
|
"rewards/margins": 0.10412784665822983, |
|
"rewards/rejected": -0.4536559581756592, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27185501066098083, |
|
"grad_norm": 8.599862072625585, |
|
"learning_rate": 4.5643480133180855e-07, |
|
"logits/chosen": -0.7215537428855896, |
|
"logits/rejected": -0.8429878354072571, |
|
"logps/chosen": -193.21871948242188, |
|
"logps/rejected": -192.76535034179688, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.43663716316223145, |
|
"rewards/margins": 0.09049404412508011, |
|
"rewards/rejected": -0.527131199836731, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2771855010660981, |
|
"grad_norm": 10.219200493862845, |
|
"learning_rate": 4.537747612187848e-07, |
|
"logits/chosen": -0.83184415102005, |
|
"logits/rejected": -1.0026618242263794, |
|
"logps/chosen": -216.73391723632812, |
|
"logps/rejected": -207.87527465820312, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.4637986123561859, |
|
"rewards/margins": 0.050548046827316284, |
|
"rewards/rejected": -0.5143465995788574, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28251599147121537, |
|
"grad_norm": 9.931121908912841, |
|
"learning_rate": 4.510441393077069e-07, |
|
"logits/chosen": -0.8512382507324219, |
|
"logits/rejected": -1.0560386180877686, |
|
"logps/chosen": -200.3084259033203, |
|
"logps/rejected": -195.31561279296875, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4075559675693512, |
|
"rewards/margins": 0.09888825565576553, |
|
"rewards/rejected": -0.5064442157745361, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2878464818763326, |
|
"grad_norm": 8.859652819595963, |
|
"learning_rate": 4.4824388140856194e-07, |
|
"logits/chosen": -0.8754502534866333, |
|
"logits/rejected": -1.0318089723587036, |
|
"logps/chosen": -198.41824340820312, |
|
"logps/rejected": -189.89077758789062, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3337915539741516, |
|
"rewards/margins": 0.13341596722602844, |
|
"rewards/rejected": -0.46720752120018005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2931769722814499, |
|
"grad_norm": 9.06466635441445, |
|
"learning_rate": 4.453749574512685e-07, |
|
"logits/chosen": -0.9197045564651489, |
|
"logits/rejected": -1.0155677795410156, |
|
"logps/chosen": -185.05616760253906, |
|
"logps/rejected": -186.17381286621094, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.36515626311302185, |
|
"rewards/margins": 0.09796006977558136, |
|
"rewards/rejected": -0.4631163477897644, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 8.989140378395783, |
|
"learning_rate": 4.4243836114972003e-07, |
|
"logits/chosen": -0.8735504150390625, |
|
"logits/rejected": -1.004237413406372, |
|
"logps/chosen": -186.00570678710938, |
|
"logps/rejected": -192.78012084960938, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3774046003818512, |
|
"rewards/margins": 0.15985320508480072, |
|
"rewards/rejected": -0.5372577905654907, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.30383795309168443, |
|
"grad_norm": 8.593968658418248, |
|
"learning_rate": 4.3943510965759113e-07, |
|
"logits/chosen": -0.9259954690933228, |
|
"logits/rejected": -1.008984088897705, |
|
"logps/chosen": -196.23764038085938, |
|
"logps/rejected": -200.01434326171875, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3551446795463562, |
|
"rewards/margins": 0.15665681660175323, |
|
"rewards/rejected": -0.5118014812469482, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3091684434968017, |
|
"grad_norm": 8.920829639132457, |
|
"learning_rate": 4.3636624321602354e-07, |
|
"logits/chosen": -0.9114233255386353, |
|
"logits/rejected": -1.0022578239440918, |
|
"logps/chosen": -199.855712890625, |
|
"logps/rejected": -202.50045776367188, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.3673010468482971, |
|
"rewards/margins": 0.11292729526758194, |
|
"rewards/rejected": -0.48022833466529846, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31449893390191896, |
|
"grad_norm": 9.061291327996578, |
|
"learning_rate": 4.3323282479331713e-07, |
|
"logits/chosen": -0.8595677614212036, |
|
"logits/rejected": -0.9653046727180481, |
|
"logps/chosen": -215.55239868164062, |
|
"logps/rejected": -208.42080688476562, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.4611906409263611, |
|
"rewards/margins": 0.046931833028793335, |
|
"rewards/rejected": -0.508122444152832, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.31982942430703626, |
|
"grad_norm": 8.932259281056975, |
|
"learning_rate": 4.300359397167469e-07, |
|
"logits/chosen": -0.899543285369873, |
|
"logits/rejected": -1.058935523033142, |
|
"logps/chosen": -221.4969024658203, |
|
"logps/rejected": -217.5653076171875, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3796769082546234, |
|
"rewards/margins": 0.15340924263000488, |
|
"rewards/rejected": -0.5330861806869507, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3251599147121535, |
|
"grad_norm": 10.69745472768232, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -0.7874996066093445, |
|
"logits/rejected": -0.9376864433288574, |
|
"logps/chosen": -175.2811279296875, |
|
"logps/rejected": -174.26589965820312, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3251371383666992, |
|
"rewards/margins": 0.14439386129379272, |
|
"rewards/rejected": -0.46953099966049194, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.3304904051172708, |
|
"grad_norm": 9.502534309876182, |
|
"learning_rate": 4.2345622044281914e-07, |
|
"logits/chosen": -0.8365820646286011, |
|
"logits/rejected": -0.9602219462394714, |
|
"logps/chosen": -198.43325805664062, |
|
"logps/rejected": -202.5500946044922, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44264811277389526, |
|
"rewards/margins": 0.15877890586853027, |
|
"rewards/rejected": -0.6014270186424255, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 10.39334291807327, |
|
"learning_rate": 4.200756652736115e-07, |
|
"logits/chosen": -0.8717101812362671, |
|
"logits/rejected": -0.9584082365036011, |
|
"logps/chosen": -212.91275024414062, |
|
"logps/rejected": -228.49215698242188, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.48386502265930176, |
|
"rewards/margins": 0.22382013499736786, |
|
"rewards/rejected": -0.7076851725578308, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 9.34889686698433, |
|
"learning_rate": 4.1663620071744896e-07, |
|
"logits/chosen": -0.8714283108711243, |
|
"logits/rejected": -0.9042676091194153, |
|
"logps/chosen": -184.66299438476562, |
|
"logps/rejected": -192.072021484375, |
|
"loss": 0.6433, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.47013959288597107, |
|
"rewards/margins": 0.11798025667667389, |
|
"rewards/rejected": -0.5881198644638062, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3464818763326226, |
|
"grad_norm": 8.959638666781437, |
|
"learning_rate": 4.131390181073076e-07, |
|
"logits/chosen": -0.8877362012863159, |
|
"logits/rejected": -1.0048226118087769, |
|
"logps/chosen": -208.8312530517578, |
|
"logps/rejected": -213.86831665039062, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.397031307220459, |
|
"rewards/margins": 0.15148170292377472, |
|
"rewards/rejected": -0.5485130548477173, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.35181236673773986, |
|
"grad_norm": 9.592550539528885, |
|
"learning_rate": 4.0958532876806036e-07, |
|
"logits/chosen": -0.8785327076911926, |
|
"logits/rejected": -0.9449760317802429, |
|
"logps/chosen": -222.5094757080078, |
|
"logps/rejected": -226.78274536132812, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5226814150810242, |
|
"rewards/margins": 0.08113773167133331, |
|
"rewards/rejected": -0.6038191914558411, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 10.432646240734098, |
|
"learning_rate": 4.0597636359690854e-07, |
|
"logits/chosen": -0.927719235420227, |
|
"logits/rejected": -1.0275365114212036, |
|
"logps/chosen": -223.2044219970703, |
|
"logps/rejected": -225.4947967529297, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5208097696304321, |
|
"rewards/margins": 0.20672473311424255, |
|
"rewards/rejected": -0.7275345325469971, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3624733475479744, |
|
"grad_norm": 11.479351717352401, |
|
"learning_rate": 4.023133726370341e-07, |
|
"logits/chosen": -0.9192001223564148, |
|
"logits/rejected": -1.064570426940918, |
|
"logps/chosen": -218.15878295898438, |
|
"logps/rejected": -227.5740966796875, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5721922516822815, |
|
"rewards/margins": 0.18816125392913818, |
|
"rewards/rejected": -0.7603535056114197, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3678038379530917, |
|
"grad_norm": 9.752374588486973, |
|
"learning_rate": 3.9859762464461986e-07, |
|
"logits/chosen": -0.9149691462516785, |
|
"logits/rejected": -0.9972041845321655, |
|
"logps/chosen": -226.1667938232422, |
|
"logps/rejected": -231.6415557861328, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5891625285148621, |
|
"rewards/margins": 0.15944533050060272, |
|
"rewards/rejected": -0.7486079931259155, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 10.686589782660716, |
|
"learning_rate": 3.9483040664938844e-07, |
|
"logits/chosen": -0.9657170176506042, |
|
"logits/rejected": -1.0521764755249023, |
|
"logps/chosen": -222.5596160888672, |
|
"logps/rejected": -228.71206665039062, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5828268527984619, |
|
"rewards/margins": 0.16033609211444855, |
|
"rewards/rejected": -0.7431629300117493, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3784648187633262, |
|
"grad_norm": 11.539842635377966, |
|
"learning_rate": 3.910130235088118e-07, |
|
"logits/chosen": -0.9302359819412231, |
|
"logits/rejected": -0.9725440740585327, |
|
"logps/chosen": -218.52835083007812, |
|
"logps/rejected": -236.67294311523438, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.6089349389076233, |
|
"rewards/margins": 0.1914242058992386, |
|
"rewards/rejected": -0.8003591299057007, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3837953091684435, |
|
"grad_norm": 11.169667795718752, |
|
"learning_rate": 3.8714679745614556e-07, |
|
"logits/chosen": -0.9223200082778931, |
|
"logits/rejected": -1.0504696369171143, |
|
"logps/chosen": -218.5863037109375, |
|
"logps/rejected": -221.20297241210938, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6130382418632507, |
|
"rewards/margins": 0.17048679292201996, |
|
"rewards/rejected": -0.7835251092910767, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38912579957356075, |
|
"grad_norm": 11.400952450487356, |
|
"learning_rate": 3.8323306764244445e-07, |
|
"logits/chosen": -0.8188157081604004, |
|
"logits/rejected": -0.9803248643875122, |
|
"logps/chosen": -235.3826904296875, |
|
"logps/rejected": -230.9711456298828, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6885900497436523, |
|
"rewards/margins": 0.15824738144874573, |
|
"rewards/rejected": -0.8468375205993652, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.39445628997867804, |
|
"grad_norm": 12.170280653967604, |
|
"learning_rate": 3.792731896727196e-07, |
|
"logits/chosen": -0.9494584798812866, |
|
"logits/rejected": -0.9925413131713867, |
|
"logps/chosen": -215.47842407226562, |
|
"logps/rejected": -234.8311767578125, |
|
"loss": 0.6269, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6851028203964233, |
|
"rewards/margins": 0.2173648625612259, |
|
"rewards/rejected": -0.9024677276611328, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3997867803837953, |
|
"grad_norm": 10.692294507184064, |
|
"learning_rate": 3.752685351363937e-07, |
|
"logits/chosen": -0.9608640670776367, |
|
"logits/rejected": -1.0993045568466187, |
|
"logps/chosen": -240.19479370117188, |
|
"logps/rejected": -244.61849975585938, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.772241473197937, |
|
"rewards/margins": 0.1992679387331009, |
|
"rewards/rejected": -0.9715094566345215, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4051172707889126, |
|
"grad_norm": 10.604201029450365, |
|
"learning_rate": 3.712204911322228e-07, |
|
"logits/chosen": -0.8940795660018921, |
|
"logits/rejected": -1.0112766027450562, |
|
"logps/chosen": -226.2286376953125, |
|
"logps/rejected": -236.16824340820312, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7305911779403687, |
|
"rewards/margins": 0.17177362740039825, |
|
"rewards/rejected": -0.9023649096488953, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41044776119402987, |
|
"grad_norm": 11.725383803893557, |
|
"learning_rate": 3.671304597878437e-07, |
|
"logits/chosen": -0.8430676460266113, |
|
"logits/rejected": -0.9990310668945312, |
|
"logps/chosen": -226.9669189453125, |
|
"logps/rejected": -233.7083740234375, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7214430570602417, |
|
"rewards/margins": 0.18212191760540009, |
|
"rewards/rejected": -0.9035650491714478, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.4157782515991471, |
|
"grad_norm": 11.91435588205294, |
|
"learning_rate": 3.629998577741174e-07, |
|
"logits/chosen": -0.9615923762321472, |
|
"logits/rejected": -1.166372537612915, |
|
"logps/chosen": -227.1701202392578, |
|
"logps/rejected": -231.6777801513672, |
|
"loss": 0.6179, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6583755016326904, |
|
"rewards/margins": 0.21044659614562988, |
|
"rewards/rejected": -0.8688220977783203, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4211087420042644, |
|
"grad_norm": 11.584436951407874, |
|
"learning_rate": 3.588301158144338e-07, |
|
"logits/chosen": -0.9084697961807251, |
|
"logits/rejected": -0.9693692922592163, |
|
"logps/chosen": -245.2818145751953, |
|
"logps/rejected": -247.6303253173828, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.822106659412384, |
|
"rewards/margins": 0.17483489215373993, |
|
"rewards/rejected": -0.9969415664672852, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 12.023549452448254, |
|
"learning_rate": 3.546226781891501e-07, |
|
"logits/chosen": -0.8738770484924316, |
|
"logits/rejected": -1.0339401960372925, |
|
"logps/chosen": -238.5256805419922, |
|
"logps/rejected": -247.3977813720703, |
|
"loss": 0.6265, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.713554322719574, |
|
"rewards/margins": 0.22547940909862518, |
|
"rewards/rejected": -0.9390336871147156, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42643923240938164, |
|
"eval_logits/chosen": -1.4091081619262695, |
|
"eval_logits/rejected": -1.3678923845291138, |
|
"eval_logps/chosen": -229.8961181640625, |
|
"eval_logps/rejected": -245.27667236328125, |
|
"eval_loss": 0.6455010771751404, |
|
"eval_rewards/accuracies": 0.6504064798355103, |
|
"eval_rewards/chosen": -0.7831487059593201, |
|
"eval_rewards/margins": 0.165547713637352, |
|
"eval_rewards/rejected": -0.9486963748931885, |
|
"eval_runtime": 167.4485, |
|
"eval_samples_per_second": 11.717, |
|
"eval_steps_per_second": 1.469, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43176972281449894, |
|
"grad_norm": 11.60850533610325, |
|
"learning_rate": 3.5037900223533325e-07, |
|
"logits/chosen": -0.9261396527290344, |
|
"logits/rejected": -1.0748217105865479, |
|
"logps/chosen": -221.2123565673828, |
|
"logps/rejected": -228.6689910888672, |
|
"loss": 0.5935, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6648604273796082, |
|
"rewards/margins": 0.26066452264785767, |
|
"rewards/rejected": -0.925524890422821, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.43710021321961623, |
|
"grad_norm": 11.18634804714827, |
|
"learning_rate": 3.461005578419791e-07, |
|
"logits/chosen": -0.8335205316543579, |
|
"logits/rejected": -0.9212998151779175, |
|
"logps/chosen": -244.4559783935547, |
|
"logps/rejected": -249.6932830810547, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8230158090591431, |
|
"rewards/margins": 0.19074369966983795, |
|
"rewards/rejected": -1.0137594938278198, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44243070362473347, |
|
"grad_norm": 12.840173089928633, |
|
"learning_rate": 3.4178882694088507e-07, |
|
"logits/chosen": -0.9584044218063354, |
|
"logits/rejected": -1.1188139915466309, |
|
"logps/chosen": -230.2344970703125, |
|
"logps/rejected": -227.662841796875, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7208009958267212, |
|
"rewards/margins": 0.14610765874385834, |
|
"rewards/rejected": -0.8669085502624512, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 12.162616519674994, |
|
"learning_rate": 3.374453029933509e-07, |
|
"logits/chosen": -0.972398579120636, |
|
"logits/rejected": -1.1250841617584229, |
|
"logps/chosen": -232.1986541748047, |
|
"logps/rejected": -250.6172332763672, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5975956320762634, |
|
"rewards/margins": 0.4015510678291321, |
|
"rewards/rejected": -0.9991466403007507, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.453091684434968, |
|
"grad_norm": 11.14473654749687, |
|
"learning_rate": 3.3307149047288575e-07, |
|
"logits/chosen": -0.9900253415107727, |
|
"logits/rejected": -1.084149718284607, |
|
"logps/chosen": -239.5063018798828, |
|
"logps/rejected": -251.7197723388672, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7624539732933044, |
|
"rewards/margins": 0.22666020691394806, |
|
"rewards/rejected": -0.9891141653060913, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.4584221748400853, |
|
"grad_norm": 12.058706301647955, |
|
"learning_rate": 3.286689043441015e-07, |
|
"logits/chosen": -0.9329894185066223, |
|
"logits/rejected": -1.0619364976882935, |
|
"logps/chosen": -246.6885223388672, |
|
"logps/rejected": -256.63360595703125, |
|
"loss": 0.6082, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8027140498161316, |
|
"rewards/margins": 0.26076698303222656, |
|
"rewards/rejected": -1.063481092453003, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46375266524520253, |
|
"grad_norm": 13.080197907672602, |
|
"learning_rate": 3.2423906953797207e-07, |
|
"logits/chosen": -0.8946924209594727, |
|
"logits/rejected": -0.9567023515701294, |
|
"logps/chosen": -211.9040069580078, |
|
"logps/rejected": -230.66299438476562, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.698184609413147, |
|
"rewards/margins": 0.28591758012771606, |
|
"rewards/rejected": -0.9841020703315735, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4690831556503198, |
|
"grad_norm": 10.950896035507196, |
|
"learning_rate": 3.197835204236402e-07, |
|
"logits/chosen": -1.0192838907241821, |
|
"logits/rejected": -1.1130427122116089, |
|
"logps/chosen": -241.8474578857422, |
|
"logps/rejected": -262.25335693359375, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.860467791557312, |
|
"rewards/margins": 0.19941401481628418, |
|
"rewards/rejected": -1.0598818063735962, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4744136460554371, |
|
"grad_norm": 12.737513715694588, |
|
"learning_rate": 3.153038002769558e-07, |
|
"logits/chosen": -0.9327136874198914, |
|
"logits/rejected": -1.030767560005188, |
|
"logps/chosen": -248.5272979736328, |
|
"logps/rejected": -258.75872802734375, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8692724108695984, |
|
"rewards/margins": 0.2143835574388504, |
|
"rewards/rejected": -1.0836559534072876, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.47974413646055436, |
|
"grad_norm": 15.296734055580355, |
|
"learning_rate": 3.1080146074592877e-07, |
|
"logits/chosen": -0.9727839231491089, |
|
"logits/rejected": -1.0751718282699585, |
|
"logps/chosen": -241.76846313476562, |
|
"logps/rejected": -251.7017364501953, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8884710073471069, |
|
"rewards/margins": 0.22023312747478485, |
|
"rewards/rejected": -1.1087043285369873, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48507462686567165, |
|
"grad_norm": 11.780615719126844, |
|
"learning_rate": 3.0627806131328246e-07, |
|
"logits/chosen": -0.9416291117668152, |
|
"logits/rejected": -1.0612024068832397, |
|
"logps/chosen": -235.45767211914062, |
|
"logps/rejected": -247.3424530029297, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8884525299072266, |
|
"rewards/margins": 0.24100270867347717, |
|
"rewards/rejected": -1.1294552087783813, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4904051172707889, |
|
"grad_norm": 13.447809886194765, |
|
"learning_rate": 3.017351687562928e-07, |
|
"logits/chosen": -1.0132644176483154, |
|
"logits/rejected": -1.1040401458740234, |
|
"logps/chosen": -247.7209014892578, |
|
"logps/rejected": -248.7446746826172, |
|
"loss": 0.6152, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9005931615829468, |
|
"rewards/margins": 0.11596985161304474, |
|
"rewards/rejected": -1.016563057899475, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4957356076759062, |
|
"grad_norm": 13.620020353000207, |
|
"learning_rate": 2.971743566041009e-07, |
|
"logits/chosen": -1.0589954853057861, |
|
"logits/rejected": -1.066146731376648, |
|
"logps/chosen": -247.865478515625, |
|
"logps/rejected": -260.4234313964844, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8577459454536438, |
|
"rewards/margins": 0.17027950286865234, |
|
"rewards/rejected": -1.0280256271362305, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.5010660980810234, |
|
"grad_norm": 12.958251619951241, |
|
"learning_rate": 2.925972045926878e-07, |
|
"logits/chosen": -0.9736588597297668, |
|
"logits/rejected": -1.0960971117019653, |
|
"logps/chosen": -220.01126098632812, |
|
"logps/rejected": -241.4611358642578, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7264107465744019, |
|
"rewards/margins": 0.30126145482063293, |
|
"rewards/rejected": -1.0276721715927124, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5063965884861408, |
|
"grad_norm": 13.3140639310756, |
|
"learning_rate": 2.880052981176979e-07, |
|
"logits/chosen": -0.9312192797660828, |
|
"logits/rejected": -1.0160053968429565, |
|
"logps/chosen": -222.43057250976562, |
|
"logps/rejected": -225.4202423095703, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8433539271354675, |
|
"rewards/margins": 0.1893020123243332, |
|
"rewards/rejected": -1.032655954360962, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 10.478988610995284, |
|
"learning_rate": 2.83400227685304e-07, |
|
"logits/chosen": -0.9926323890686035, |
|
"logits/rejected": -1.1106306314468384, |
|
"logps/chosen": -249.5770263671875, |
|
"logps/rejected": -254.35794067382812, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.795128345489502, |
|
"rewards/margins": 0.20585620403289795, |
|
"rewards/rejected": -1.0009845495224, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5170575692963753, |
|
"grad_norm": 13.398290532585486, |
|
"learning_rate": 2.7878358836129984e-07, |
|
"logits/chosen": -1.035072922706604, |
|
"logits/rejected": -1.1353219747543335, |
|
"logps/chosen": -231.4602508544922, |
|
"logps/rejected": -246.7826690673828, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7928118705749512, |
|
"rewards/margins": 0.24832260608673096, |
|
"rewards/rejected": -1.0411344766616821, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 17.87280922700601, |
|
"learning_rate": 2.7415697921861525e-07, |
|
"logits/chosen": -0.9991563558578491, |
|
"logits/rejected": -1.1898800134658813, |
|
"logps/chosen": -280.6573181152344, |
|
"logps/rejected": -277.5446472167969, |
|
"loss": 0.6233, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0797468423843384, |
|
"rewards/margins": 0.14736375212669373, |
|
"rewards/rejected": -1.2271106243133545, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5277185501066098, |
|
"grad_norm": 14.03763690100759, |
|
"learning_rate": 2.6952200278344253e-07, |
|
"logits/chosen": -0.8905277252197266, |
|
"logits/rejected": -1.083092451095581, |
|
"logps/chosen": -234.2960662841797, |
|
"logps/rejected": -247.935791015625, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8255411982536316, |
|
"rewards/margins": 0.22554175555706024, |
|
"rewards/rejected": -1.051082968711853, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.5330490405117271, |
|
"grad_norm": 13.488186802243888, |
|
"learning_rate": 2.6488026448016686e-07, |
|
"logits/chosen": -1.000211238861084, |
|
"logits/rejected": -1.137927770614624, |
|
"logps/chosen": -260.8866882324219, |
|
"logps/rejected": -279.22772216796875, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9299663305282593, |
|
"rewards/margins": 0.32078418135643005, |
|
"rewards/rejected": -1.2507504224777222, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5383795309168443, |
|
"grad_norm": 13.891894437147359, |
|
"learning_rate": 2.602333720752927e-07, |
|
"logits/chosen": -1.0730583667755127, |
|
"logits/rejected": -1.1127971410751343, |
|
"logps/chosen": -249.81387329101562, |
|
"logps/rejected": -281.8894958496094, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9583339691162109, |
|
"rewards/margins": 0.47916507720947266, |
|
"rewards/rejected": -1.4374990463256836, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.5437100213219617, |
|
"grad_norm": 13.78432594287455, |
|
"learning_rate": 2.5558293512055923e-07, |
|
"logits/chosen": -0.9855419397354126, |
|
"logits/rejected": -1.0839966535568237, |
|
"logps/chosen": -259.2463684082031, |
|
"logps/rejected": -281.6179504394531, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9760101437568665, |
|
"rewards/margins": 0.3508725166320801, |
|
"rewards/rejected": -1.3268824815750122, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5490405117270789, |
|
"grad_norm": 13.647127313830234, |
|
"learning_rate": 2.509305643954369e-07, |
|
"logits/chosen": -1.0535143613815308, |
|
"logits/rejected": -1.2141129970550537, |
|
"logps/chosen": -235.3438720703125, |
|
"logps/rejected": -240.82583618164062, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8737846612930298, |
|
"rewards/margins": 0.20960083603858948, |
|
"rewards/rejected": -1.0833853483200073, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5543710021321961, |
|
"grad_norm": 12.039038135090694, |
|
"learning_rate": 2.4627787134919946e-07, |
|
"logits/chosen": -0.9818887710571289, |
|
"logits/rejected": -1.1572598218917847, |
|
"logps/chosen": -270.9326171875, |
|
"logps/rejected": -290.0334167480469, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9136640429496765, |
|
"rewards/margins": 0.3413304388523102, |
|
"rewards/rejected": -1.254994511604309, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5597014925373134, |
|
"grad_norm": 12.903483427378369, |
|
"learning_rate": 2.41626467542764e-07, |
|
"logits/chosen": -0.9903634190559387, |
|
"logits/rejected": -1.0747687816619873, |
|
"logps/chosen": -246.91549682617188, |
|
"logps/rejected": -270.34954833984375, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9555915594100952, |
|
"rewards/margins": 0.39527803659439087, |
|
"rewards/rejected": -1.3508695363998413, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5650319829424307, |
|
"grad_norm": 13.018513420276996, |
|
"learning_rate": 2.369779640904909e-07, |
|
"logits/chosen": -1.0167109966278076, |
|
"logits/rejected": -1.1171668767929077, |
|
"logps/chosen": -260.72833251953125, |
|
"logps/rejected": -275.2940673828125, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9983251690864563, |
|
"rewards/margins": 0.2603410482406616, |
|
"rewards/rejected": -1.2586661577224731, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.570362473347548, |
|
"grad_norm": 14.17080589691093, |
|
"learning_rate": 2.3233397110214044e-07, |
|
"logits/chosen": -1.114485740661621, |
|
"logits/rejected": -1.2273226976394653, |
|
"logps/chosen": -267.9627380371094, |
|
"logps/rejected": -286.55926513671875, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9473945498466492, |
|
"rewards/margins": 0.29965347051620483, |
|
"rewards/rejected": -1.247048020362854, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5756929637526652, |
|
"grad_norm": 14.907759901462226, |
|
"learning_rate": 2.2769609712517602e-07, |
|
"logits/chosen": -1.051343560218811, |
|
"logits/rejected": -1.112343668937683, |
|
"logps/chosen": -282.5835266113281, |
|
"logps/rejected": -286.35833740234375, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1061525344848633, |
|
"rewards/margins": 0.1019170731306076, |
|
"rewards/rejected": -1.2080695629119873, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5810234541577826, |
|
"grad_norm": 14.332154011748669, |
|
"learning_rate": 2.2306594858760898e-07, |
|
"logits/chosen": -0.9886674880981445, |
|
"logits/rejected": -1.1096317768096924, |
|
"logps/chosen": -264.4873046875, |
|
"logps/rejected": -288.09381103515625, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9765904545783997, |
|
"rewards/margins": 0.43369174003601074, |
|
"rewards/rejected": -1.4102822542190552, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5863539445628998, |
|
"grad_norm": 14.364439575082633, |
|
"learning_rate": 2.184451292415778e-07, |
|
"logits/chosen": -1.0294235944747925, |
|
"logits/rejected": -1.0585139989852905, |
|
"logps/chosen": -229.28250122070312, |
|
"logps/rejected": -251.99560546875, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8699715733528137, |
|
"rewards/margins": 0.3365539610385895, |
|
"rewards/rejected": -1.2065255641937256, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.591684434968017, |
|
"grad_norm": 14.37697025208913, |
|
"learning_rate": 2.1383523960785342e-07, |
|
"logits/chosen": -1.0905894041061401, |
|
"logits/rejected": -1.2189487218856812, |
|
"logps/chosen": -245.4988250732422, |
|
"logps/rejected": -253.37771606445312, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8295791745185852, |
|
"rewards/margins": 0.25238728523254395, |
|
"rewards/rejected": -1.0819664001464844, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 13.273086730559621, |
|
"learning_rate": 2.0923787642146434e-07, |
|
"logits/chosen": -0.9458072781562805, |
|
"logits/rejected": -1.0978261232376099, |
|
"logps/chosen": -216.93496704101562, |
|
"logps/rejected": -238.4340362548828, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8222309947013855, |
|
"rewards/margins": 0.3146277964115143, |
|
"rewards/rejected": -1.1368588209152222, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6023454157782516, |
|
"grad_norm": 12.275413565116182, |
|
"learning_rate": 2.046546320786331e-07, |
|
"logits/chosen": -1.0852059125900269, |
|
"logits/rejected": -1.217184066772461, |
|
"logps/chosen": -243.6894989013672, |
|
"logps/rejected": -254.8353271484375, |
|
"loss": 0.6099, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8728886842727661, |
|
"rewards/margins": 0.2236359417438507, |
|
"rewards/rejected": -1.0965244770050049, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.6076759061833689, |
|
"grad_norm": 13.385563460718537, |
|
"learning_rate": 2.0008709408521507e-07, |
|
"logits/chosen": -1.075157880783081, |
|
"logits/rejected": -1.1313838958740234, |
|
"logps/chosen": -230.10983276367188, |
|
"logps/rejected": -256.946044921875, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7323789000511169, |
|
"rewards/margins": 0.291358083486557, |
|
"rewards/rejected": -1.0237371921539307, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6130063965884861, |
|
"grad_norm": 12.904686522939175, |
|
"learning_rate": 1.9553684450683193e-07, |
|
"logits/chosen": -1.092653512954712, |
|
"logits/rejected": -1.1976040601730347, |
|
"logps/chosen": -237.1493682861328, |
|
"logps/rejected": -256.56890869140625, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8681387901306152, |
|
"rewards/margins": 0.29039478302001953, |
|
"rewards/rejected": -1.1585334539413452, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6183368869936035, |
|
"grad_norm": 18.21477968462917, |
|
"learning_rate": 1.9100545942088848e-07, |
|
"logits/chosen": -1.0292062759399414, |
|
"logits/rejected": -1.1282401084899902, |
|
"logps/chosen": -222.6328582763672, |
|
"logps/rejected": -251.3138885498047, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8912476301193237, |
|
"rewards/margins": 0.3392987847328186, |
|
"rewards/rejected": -1.2305463552474976, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6236673773987207, |
|
"grad_norm": 13.078178032971321, |
|
"learning_rate": 1.8649450837066444e-07, |
|
"logits/chosen": -1.1086572408676147, |
|
"logits/rejected": -1.2702162265777588, |
|
"logps/chosen": -237.74673461914062, |
|
"logps/rejected": -256.3179626464844, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7724729776382446, |
|
"rewards/margins": 0.34164559841156006, |
|
"rewards/rejected": -1.1141188144683838, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.6289978678038379, |
|
"grad_norm": 14.54157536622139, |
|
"learning_rate": 1.8200555382166898e-07, |
|
"logits/chosen": -1.036029577255249, |
|
"logits/rejected": -1.1076552867889404, |
|
"logps/chosen": -257.90423583984375, |
|
"logps/rejected": -276.80859375, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9389132261276245, |
|
"rewards/margins": 0.3674803078174591, |
|
"rewards/rejected": -1.3063933849334717, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6343283582089553, |
|
"grad_norm": 13.963862396350292, |
|
"learning_rate": 1.775401506204472e-07, |
|
"logits/chosen": -1.0365560054779053, |
|
"logits/rejected": -1.111011266708374, |
|
"logps/chosen": -249.289794921875, |
|
"logps/rejected": -261.4861755371094, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9952207803726196, |
|
"rewards/margins": 0.24101737141609192, |
|
"rewards/rejected": -1.2362381219863892, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.6396588486140725, |
|
"grad_norm": 15.013492823138591, |
|
"learning_rate": 1.7309984545602528e-07, |
|
"logits/chosen": -1.160706877708435, |
|
"logits/rejected": -1.2153781652450562, |
|
"logps/chosen": -283.8118896484375, |
|
"logps/rejected": -309.1605529785156, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2339740991592407, |
|
"rewards/margins": 0.33963102102279663, |
|
"rewards/rejected": -1.5736052989959717, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6449893390191898, |
|
"grad_norm": 12.416383191561394, |
|
"learning_rate": 1.6868617632418114e-07, |
|
"logits/chosen": -1.1419028043746948, |
|
"logits/rejected": -1.2748745679855347, |
|
"logps/chosen": -278.46051025390625, |
|
"logps/rejected": -305.10687255859375, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.106767177581787, |
|
"rewards/margins": 0.3402232229709625, |
|
"rewards/rejected": -1.4469903707504272, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.650319829424307, |
|
"grad_norm": 13.316722698785181, |
|
"learning_rate": 1.6430067199472657e-07, |
|
"logits/chosen": -1.0973302125930786, |
|
"logits/rejected": -1.1705577373504639, |
|
"logps/chosen": -238.37857055664062, |
|
"logps/rejected": -260.87811279296875, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9219633936882019, |
|
"rewards/margins": 0.3023082911968231, |
|
"rewards/rejected": -1.224271535873413, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6556503198294243, |
|
"grad_norm": 14.96464401167362, |
|
"learning_rate": 1.599448514819844e-07, |
|
"logits/chosen": -1.1169979572296143, |
|
"logits/rejected": -1.2400496006011963, |
|
"logps/chosen": -251.90786743164062, |
|
"logps/rejected": -275.16632080078125, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9491860270500183, |
|
"rewards/margins": 0.33866086602211, |
|
"rewards/rejected": -1.2878468036651611, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6609808102345416, |
|
"grad_norm": 14.725406377953089, |
|
"learning_rate": 1.5562022351864534e-07, |
|
"logits/chosen": -1.0977303981781006, |
|
"logits/rejected": -1.129206657409668, |
|
"logps/chosen": -240.2178955078125, |
|
"logps/rejected": -280.0063171386719, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.890995979309082, |
|
"rewards/margins": 0.4513840079307556, |
|
"rewards/rejected": -1.3423799276351929, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6663113006396588, |
|
"grad_norm": 12.844371576008434, |
|
"learning_rate": 1.5132828603318577e-07, |
|
"logits/chosen": -1.0323293209075928, |
|
"logits/rejected": -1.139953374862671, |
|
"logps/chosen": -255.4688720703125, |
|
"logps/rejected": -270.874755859375, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0522164106369019, |
|
"rewards/margins": 0.21118326485157013, |
|
"rewards/rejected": -1.2633997201919556, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 17.421764111517, |
|
"learning_rate": 1.4707052563102748e-07, |
|
"logits/chosen": -1.0614488124847412, |
|
"logits/rejected": -1.1523784399032593, |
|
"logps/chosen": -246.06497192382812, |
|
"logps/rejected": -260.67041015625, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0199403762817383, |
|
"rewards/margins": 0.25652509927749634, |
|
"rewards/rejected": -1.2764654159545898, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6769722814498934, |
|
"grad_norm": 11.820948981122326, |
|
"learning_rate": 1.4284841707961987e-07, |
|
"logits/chosen": -1.1177728176116943, |
|
"logits/rejected": -1.2426093816757202, |
|
"logps/chosen": -238.572509765625, |
|
"logps/rejected": -274.74896240234375, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9242167472839355, |
|
"rewards/margins": 0.4471352994441986, |
|
"rewards/rejected": -1.371351957321167, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 16.810425880201876, |
|
"learning_rate": 1.386634227976224e-07, |
|
"logits/chosen": -1.0978498458862305, |
|
"logits/rejected": -1.1455624103546143, |
|
"logps/chosen": -252.0463409423828, |
|
"logps/rejected": -267.16900634765625, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.01809561252594, |
|
"rewards/margins": 0.2102915495634079, |
|
"rewards/rejected": -1.2283871173858643, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6876332622601279, |
|
"grad_norm": 16.16875613964214, |
|
"learning_rate": 1.345169923483642e-07, |
|
"logits/chosen": -1.074209451675415, |
|
"logits/rejected": -1.0901873111724854, |
|
"logps/chosen": -242.7150421142578, |
|
"logps/rejected": -255.5235595703125, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9898387789726257, |
|
"rewards/margins": 0.1817895770072937, |
|
"rewards/rejected": -1.1716282367706299, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6929637526652452, |
|
"grad_norm": 14.889847230913357, |
|
"learning_rate": 1.3041056193775665e-07, |
|
"logits/chosen": -1.1271008253097534, |
|
"logits/rejected": -1.2666515111923218, |
|
"logps/chosen": -270.30523681640625, |
|
"logps/rejected": -305.54791259765625, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0262703895568848, |
|
"rewards/margins": 0.49466007947921753, |
|
"rewards/rejected": -1.520930528640747, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6982942430703625, |
|
"grad_norm": 17.172517125459027, |
|
"learning_rate": 1.2634555391683188e-07, |
|
"logits/chosen": -1.1146763563156128, |
|
"logits/rejected": -1.1514074802398682, |
|
"logps/chosen": -282.617431640625, |
|
"logps/rejected": -305.576416015625, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0780295133590698, |
|
"rewards/margins": 0.341577410697937, |
|
"rewards/rejected": -1.4196069240570068, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.7036247334754797, |
|
"grad_norm": 12.769747207532605, |
|
"learning_rate": 1.2232337628908103e-07, |
|
"logits/chosen": -1.005274772644043, |
|
"logits/rejected": -1.120625376701355, |
|
"logps/chosen": -270.090087890625, |
|
"logps/rejected": -300.65325927734375, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.084705114364624, |
|
"rewards/margins": 0.36065369844436646, |
|
"rewards/rejected": -1.4453589916229248, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7089552238805971, |
|
"grad_norm": 13.848401050251594, |
|
"learning_rate": 1.1834542222276206e-07, |
|
"logits/chosen": -1.1191794872283936, |
|
"logits/rejected": -1.2426977157592773, |
|
"logps/chosen": -271.416015625, |
|
"logps/rejected": -293.0040588378906, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0729753971099854, |
|
"rewards/margins": 0.35853153467178345, |
|
"rewards/rejected": -1.4315071105957031, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 15.508341962305703, |
|
"learning_rate": 1.1441306956834504e-07, |
|
"logits/chosen": -1.1978566646575928, |
|
"logits/rejected": -1.2869031429290771, |
|
"logps/chosen": -252.2648162841797, |
|
"logps/rejected": -265.5303649902344, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.888351559638977, |
|
"rewards/margins": 0.23599569499492645, |
|
"rewards/rejected": -1.1243473291397095, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7196162046908315, |
|
"grad_norm": 12.326647139033, |
|
"learning_rate": 1.1052768038126464e-07, |
|
"logits/chosen": -1.0239012241363525, |
|
"logits/rejected": -1.1444613933563232, |
|
"logps/chosen": -273.5757751464844, |
|
"logps/rejected": -296.2203369140625, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0173006057739258, |
|
"rewards/margins": 0.30182453989982605, |
|
"rewards/rejected": -1.3191251754760742, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7249466950959488, |
|
"grad_norm": 18.71316397695756, |
|
"learning_rate": 1.0669060045014214e-07, |
|
"logits/chosen": -1.1475574970245361, |
|
"logits/rejected": -1.2453533411026, |
|
"logps/chosen": -267.3650207519531, |
|
"logps/rejected": -284.7320861816406, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.9687989354133606, |
|
"rewards/margins": 0.274304062128067, |
|
"rewards/rejected": -1.2431029081344604, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7302771855010661, |
|
"grad_norm": 20.228539694090426, |
|
"learning_rate": 1.0290315883064258e-07, |
|
"logits/chosen": -1.0727207660675049, |
|
"logits/rejected": -1.169166088104248, |
|
"logps/chosen": -234.42538452148438, |
|
"logps/rejected": -259.74664306640625, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.879969596862793, |
|
"rewards/margins": 0.3725913166999817, |
|
"rewards/rejected": -1.2525609731674194, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.7356076759061834, |
|
"grad_norm": 13.518054635862288, |
|
"learning_rate": 9.9166667385128e-08, |
|
"logits/chosen": -1.0578858852386475, |
|
"logits/rejected": -1.159432053565979, |
|
"logps/chosen": -266.2313232421875, |
|
"logps/rejected": -277.4366149902344, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0338330268859863, |
|
"rewards/margins": 0.32066407799720764, |
|
"rewards/rejected": -1.3544971942901611, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7409381663113006, |
|
"grad_norm": 13.588646234249182, |
|
"learning_rate": 9.54824203282647e-08, |
|
"logits/chosen": -1.1122428178787231, |
|
"logits/rejected": -1.1691913604736328, |
|
"logps/chosen": -292.1220703125, |
|
"logps/rejected": -314.33013916015625, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.232649564743042, |
|
"rewards/margins": 0.3411175310611725, |
|
"rewards/rejected": -1.573767066001892, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 13.39599747528393, |
|
"learning_rate": 9.185169377874488e-08, |
|
"logits/chosen": -1.0565317869186401, |
|
"logits/rejected": -1.104970932006836, |
|
"logps/chosen": -232.4084014892578, |
|
"logps/rejected": -271.18511962890625, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9469151496887207, |
|
"rewards/margins": 0.4982013702392578, |
|
"rewards/rejected": -1.445116639137268, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7515991471215352, |
|
"grad_norm": 12.55265938358736, |
|
"learning_rate": 8.827574531727452e-08, |
|
"logits/chosen": -1.085356593132019, |
|
"logits/rejected": -1.254529595375061, |
|
"logps/chosen": -234.07498168945312, |
|
"logps/rejected": -255.6736297607422, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9192997217178345, |
|
"rewards/margins": 0.39327552914619446, |
|
"rewards/rejected": -1.3125752210617065, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.7569296375266524, |
|
"grad_norm": 16.169497185007014, |
|
"learning_rate": 8.475581355098379e-08, |
|
"logits/chosen": -1.1205322742462158, |
|
"logits/rejected": -1.227104902267456, |
|
"logps/chosen": -254.82998657226562, |
|
"logps/rejected": -265.16961669921875, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9393894076347351, |
|
"rewards/margins": 0.3107382357120514, |
|
"rewards/rejected": -1.2501277923583984, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7622601279317697, |
|
"grad_norm": 12.716385801386352, |
|
"learning_rate": 8.129311768440807e-08, |
|
"logits/chosen": -1.0053701400756836, |
|
"logits/rejected": -1.096592903137207, |
|
"logps/chosen": -277.47540283203125, |
|
"logps/rejected": -304.0431823730469, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1990084648132324, |
|
"rewards/margins": 0.3271089792251587, |
|
"rewards/rejected": -1.5261173248291016, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 18.552637237088017, |
|
"learning_rate": 7.788885709719033e-08, |
|
"logits/chosen": -1.0947494506835938, |
|
"logits/rejected": -1.1496310234069824, |
|
"logps/chosen": -254.96630859375, |
|
"logps/rejected": -285.97308349609375, |
|
"loss": 0.6177, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0156748294830322, |
|
"rewards/margins": 0.3178596496582031, |
|
"rewards/rejected": -1.333534598350525, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7729211087420043, |
|
"grad_norm": 12.746321182622895, |
|
"learning_rate": 7.454421092865037e-08, |
|
"logits/chosen": -1.0280872583389282, |
|
"logits/rejected": -1.1245920658111572, |
|
"logps/chosen": -273.39501953125, |
|
"logps/rejected": -283.32110595703125, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.220465898513794, |
|
"rewards/margins": 0.208203986287117, |
|
"rewards/rejected": -1.4286696910858154, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7782515991471215, |
|
"grad_norm": 11.876891342705504, |
|
"learning_rate": 7.126033766936365e-08, |
|
"logits/chosen": -1.1646153926849365, |
|
"logits/rejected": -1.2614471912384033, |
|
"logps/chosen": -267.27734375, |
|
"logps/rejected": -288.18731689453125, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.021458387374878, |
|
"rewards/margins": 0.3050800859928131, |
|
"rewards/rejected": -1.3265384435653687, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7835820895522388, |
|
"grad_norm": 13.39270512390593, |
|
"learning_rate": 6.80383747598938e-08, |
|
"logits/chosen": -1.1257355213165283, |
|
"logits/rejected": -1.1825156211853027, |
|
"logps/chosen": -269.59747314453125, |
|
"logps/rejected": -300.54266357421875, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0236284732818604, |
|
"rewards/margins": 0.42881250381469727, |
|
"rewards/rejected": -1.4524409770965576, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7889125799573561, |
|
"grad_norm": 14.236670336488492, |
|
"learning_rate": 6.487943819681488e-08, |
|
"logits/chosen": -1.0966401100158691, |
|
"logits/rejected": -1.1666018962860107, |
|
"logps/chosen": -249.0513153076172, |
|
"logps/rejected": -277.46051025390625, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.015439748764038, |
|
"rewards/margins": 0.33569568395614624, |
|
"rewards/rejected": -1.3511353731155396, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7942430703624733, |
|
"grad_norm": 13.439712780756642, |
|
"learning_rate": 6.178462214616203e-08, |
|
"logits/chosen": -1.0600165128707886, |
|
"logits/rejected": -1.1609599590301514, |
|
"logps/chosen": -253.85018920898438, |
|
"logps/rejected": -286.5955810546875, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0086755752563477, |
|
"rewards/margins": 0.4383172392845154, |
|
"rewards/rejected": -1.4469928741455078, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7995735607675906, |
|
"grad_norm": 13.727935713255976, |
|
"learning_rate": 5.875499856444358e-08, |
|
"logits/chosen": -1.052286148071289, |
|
"logits/rejected": -1.177433967590332, |
|
"logps/chosen": -263.67333984375, |
|
"logps/rejected": -291.416015625, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.058933138847351, |
|
"rewards/margins": 0.31677955389022827, |
|
"rewards/rejected": -1.3757128715515137, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8049040511727079, |
|
"grad_norm": 13.5774716097425, |
|
"learning_rate": 5.5791616827345484e-08, |
|
"logits/chosen": -1.1035162210464478, |
|
"logits/rejected": -1.2241528034210205, |
|
"logps/chosen": -258.1141357421875, |
|
"logps/rejected": -289.6146545410156, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0831810235977173, |
|
"rewards/margins": 0.37278053164482117, |
|
"rewards/rejected": -1.4559617042541504, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.8102345415778252, |
|
"grad_norm": 13.872602372334944, |
|
"learning_rate": 5.289550336625731e-08, |
|
"logits/chosen": -0.967927098274231, |
|
"logits/rejected": -1.1444356441497803, |
|
"logps/chosen": -245.47329711914062, |
|
"logps/rejected": -270.84033203125, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0041565895080566, |
|
"rewards/margins": 0.34393635392189026, |
|
"rewards/rejected": -1.348093032836914, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8155650319829424, |
|
"grad_norm": 14.466168736500185, |
|
"learning_rate": 5.006766131274559e-08, |
|
"logits/chosen": -1.1071698665618896, |
|
"logits/rejected": -1.1825703382492065, |
|
"logps/chosen": -275.54388427734375, |
|
"logps/rejected": -296.87689208984375, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1156272888183594, |
|
"rewards/margins": 0.3059665858745575, |
|
"rewards/rejected": -1.4215937852859497, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 15.26501051880337, |
|
"learning_rate": 4.730907015109759e-08, |
|
"logits/chosen": -1.037107229232788, |
|
"logits/rejected": -1.1411950588226318, |
|
"logps/chosen": -266.26123046875, |
|
"logps/rejected": -296.9294128417969, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0510127544403076, |
|
"rewards/margins": 0.46340426802635193, |
|
"rewards/rejected": -1.5144169330596924, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.826226012793177, |
|
"grad_norm": 10.911420515343652, |
|
"learning_rate": 4.4620685379055584e-08, |
|
"logits/chosen": -1.0774571895599365, |
|
"logits/rejected": -1.2212311029434204, |
|
"logps/chosen": -263.77813720703125, |
|
"logps/rejected": -281.2510070800781, |
|
"loss": 0.6032, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.093552589416504, |
|
"rewards/margins": 0.22536174952983856, |
|
"rewards/rejected": -1.318914532661438, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8315565031982942, |
|
"grad_norm": 13.252940181725066, |
|
"learning_rate": 4.200343817685981e-08, |
|
"logits/chosen": -1.134172797203064, |
|
"logits/rejected": -1.157869577407837, |
|
"logps/chosen": -237.58920288085938, |
|
"logps/rejected": -262.5208435058594, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9115175008773804, |
|
"rewards/margins": 0.29485780000686646, |
|
"rewards/rejected": -1.2063753604888916, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8368869936034116, |
|
"grad_norm": 18.75722287778124, |
|
"learning_rate": 3.945823508471352e-08, |
|
"logits/chosen": -1.1293060779571533, |
|
"logits/rejected": -1.2253621816635132, |
|
"logps/chosen": -274.54595947265625, |
|
"logps/rejected": -298.9117126464844, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0663083791732788, |
|
"rewards/margins": 0.33463555574417114, |
|
"rewards/rejected": -1.4009437561035156, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.8422174840085288, |
|
"grad_norm": 12.55898038094129, |
|
"learning_rate": 3.698595768878363e-08, |
|
"logits/chosen": -1.0901148319244385, |
|
"logits/rejected": -1.2076427936553955, |
|
"logps/chosen": -242.2190704345703, |
|
"logps/rejected": -263.9744567871094, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9093745946884155, |
|
"rewards/margins": 0.4039868712425232, |
|
"rewards/rejected": -1.313361406326294, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.847547974413646, |
|
"grad_norm": 12.79776093588139, |
|
"learning_rate": 3.458746231584414e-08, |
|
"logits/chosen": -1.1291230916976929, |
|
"logits/rejected": -1.2340444326400757, |
|
"logps/chosen": -260.56195068359375, |
|
"logps/rejected": -304.3238525390625, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.992597222328186, |
|
"rewards/margins": 0.5544020533561707, |
|
"rewards/rejected": -1.5469990968704224, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 13.33658395118552, |
|
"learning_rate": 3.226357973666888e-08, |
|
"logits/chosen": -1.10861074924469, |
|
"logits/rejected": -1.2813326120376587, |
|
"logps/chosen": -228.31155395507812, |
|
"logps/rejected": -253.84207153320312, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8997095227241516, |
|
"rewards/margins": 0.36860379576683044, |
|
"rewards/rejected": -1.2683132886886597, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8528784648187633, |
|
"eval_logits/chosen": -1.526044487953186, |
|
"eval_logits/rejected": -1.4904903173446655, |
|
"eval_logps/chosen": -256.8968811035156, |
|
"eval_logps/rejected": -280.7786560058594, |
|
"eval_loss": 0.6389869451522827, |
|
"eval_rewards/accuracies": 0.6056910753250122, |
|
"eval_rewards/chosen": -1.0531564950942993, |
|
"eval_rewards/margins": 0.25055956840515137, |
|
"eval_rewards/rejected": -1.3037161827087402, |
|
"eval_runtime": 165.6574, |
|
"eval_samples_per_second": 11.844, |
|
"eval_steps_per_second": 1.485, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8582089552238806, |
|
"grad_norm": 13.707316978636483, |
|
"learning_rate": 3.001511487827582e-08, |
|
"logits/chosen": -1.117619514465332, |
|
"logits/rejected": -1.1415525674819946, |
|
"logps/chosen": -267.25225830078125, |
|
"logps/rejected": -302.49298095703125, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0340323448181152, |
|
"rewards/margins": 0.3930579125881195, |
|
"rewards/rejected": -1.4270904064178467, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.8635394456289979, |
|
"grad_norm": 16.362755169612413, |
|
"learning_rate": 2.7842846545123505e-08, |
|
"logits/chosen": -1.0309226512908936, |
|
"logits/rejected": -1.1349594593048096, |
|
"logps/chosen": -256.92706298828125, |
|
"logps/rejected": -267.79986572265625, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9786995649337769, |
|
"rewards/margins": 0.1820269525051117, |
|
"rewards/rejected": -1.160726547241211, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8688699360341151, |
|
"grad_norm": 13.309838462940968, |
|
"learning_rate": 2.5747527149355018e-08, |
|
"logits/chosen": -1.1667518615722656, |
|
"logits/rejected": -1.2266581058502197, |
|
"logps/chosen": -267.74517822265625, |
|
"logps/rejected": -306.21563720703125, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.044034719467163, |
|
"rewards/margins": 0.49896711111068726, |
|
"rewards/rejected": -1.5430018901824951, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.8742004264392325, |
|
"grad_norm": 12.846376086365586, |
|
"learning_rate": 2.372988245018401e-08, |
|
"logits/chosen": -1.05556321144104, |
|
"logits/rejected": -1.1795189380645752, |
|
"logps/chosen": -259.63458251953125, |
|
"logps/rejected": -298.86761474609375, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0314178466796875, |
|
"rewards/margins": 0.522697925567627, |
|
"rewards/rejected": -1.5541157722473145, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8795309168443497, |
|
"grad_norm": 13.824707878509736, |
|
"learning_rate": 2.1790611302512114e-08, |
|
"logits/chosen": -1.1069999933242798, |
|
"logits/rejected": -1.1457974910736084, |
|
"logps/chosen": -283.31170654296875, |
|
"logps/rejected": -302.56658935546875, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.16603684425354, |
|
"rewards/margins": 0.2864134609699249, |
|
"rewards/rejected": -1.4524505138397217, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8848614072494669, |
|
"grad_norm": 15.28607581049521, |
|
"learning_rate": 1.9930385414865386e-08, |
|
"logits/chosen": -1.0714858770370483, |
|
"logits/rejected": -1.1165021657943726, |
|
"logps/chosen": -269.0600280761719, |
|
"logps/rejected": -298.10711669921875, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1144940853118896, |
|
"rewards/margins": 0.3277047276496887, |
|
"rewards/rejected": -1.4421989917755127, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8901918976545842, |
|
"grad_norm": 16.521551762750804, |
|
"learning_rate": 1.8149849116733672e-08, |
|
"logits/chosen": -1.0863420963287354, |
|
"logits/rejected": -1.1994072198867798, |
|
"logps/chosen": -260.0115661621094, |
|
"logps/rejected": -284.62774658203125, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9503141641616821, |
|
"rewards/margins": 0.32754647731781006, |
|
"rewards/rejected": -1.2778605222702026, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 18.673626539883045, |
|
"learning_rate": 1.6449619135393084e-08, |
|
"logits/chosen": -1.0423157215118408, |
|
"logits/rejected": -1.1916964054107666, |
|
"logps/chosen": -263.9278564453125, |
|
"logps/rejected": -286.19940185546875, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0104882717132568, |
|
"rewards/margins": 0.30736953020095825, |
|
"rewards/rejected": -1.3178579807281494, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9008528784648188, |
|
"grad_norm": 11.823082792807151, |
|
"learning_rate": 1.4830284382289144e-08, |
|
"logits/chosen": -1.114751935005188, |
|
"logits/rejected": -1.1412584781646729, |
|
"logps/chosen": -269.94757080078125, |
|
"logps/rejected": -289.11083984375, |
|
"loss": 0.5786, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1332142353057861, |
|
"rewards/margins": 0.24711325764656067, |
|
"rewards/rejected": -1.3803274631500244, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.906183368869936, |
|
"grad_norm": 14.344497159976747, |
|
"learning_rate": 1.329240574905452e-08, |
|
"logits/chosen": -1.1762893199920654, |
|
"logits/rejected": -1.265937328338623, |
|
"logps/chosen": -289.95892333984375, |
|
"logps/rejected": -309.7876281738281, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0822083950042725, |
|
"rewards/margins": 0.27944907546043396, |
|
"rewards/rejected": -1.3616573810577393, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9115138592750534, |
|
"grad_norm": 18.008488344780044, |
|
"learning_rate": 1.1836515913232175e-08, |
|
"logits/chosen": -1.1288697719573975, |
|
"logits/rejected": -1.3069543838500977, |
|
"logps/chosen": -264.71087646484375, |
|
"logps/rejected": -277.7538757324219, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0512837171554565, |
|
"rewards/margins": 0.2792138457298279, |
|
"rewards/rejected": -1.3304975032806396, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.9168443496801706, |
|
"grad_norm": 14.381654064223152, |
|
"learning_rate": 1.0463119153770989e-08, |
|
"logits/chosen": -1.143795132637024, |
|
"logits/rejected": -1.2443573474884033, |
|
"logps/chosen": -259.2528076171875, |
|
"logps/rejected": -280.7781066894531, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0562851428985596, |
|
"rewards/margins": 0.3439714312553406, |
|
"rewards/rejected": -1.4002567529678345, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9221748400852878, |
|
"grad_norm": 18.214838134208975, |
|
"learning_rate": 9.172691176357633e-09, |
|
"logits/chosen": -1.1904518604278564, |
|
"logits/rejected": -1.3172063827514648, |
|
"logps/chosen": -243.25830078125, |
|
"logps/rejected": -253.4103240966797, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9157142639160156, |
|
"rewards/margins": 0.22436395287513733, |
|
"rewards/rejected": -1.1400783061981201, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.9275053304904051, |
|
"grad_norm": 13.634453891392663, |
|
"learning_rate": 7.965678948645832e-09, |
|
"logits/chosen": -1.1409590244293213, |
|
"logits/rejected": -1.193704605102539, |
|
"logps/chosen": -290.54986572265625, |
|
"logps/rejected": -311.5206604003906, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1318142414093018, |
|
"rewards/margins": 0.27807727456092834, |
|
"rewards/rejected": -1.4098914861679077, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9328358208955224, |
|
"grad_norm": 16.785891539902277, |
|
"learning_rate": 6.842500545439278e-09, |
|
"logits/chosen": -1.189774751663208, |
|
"logits/rejected": -1.1961729526519775, |
|
"logps/chosen": -282.62677001953125, |
|
"logps/rejected": -313.8055419921875, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.145236611366272, |
|
"rewards/margins": 0.3282146751880646, |
|
"rewards/rejected": -1.4734513759613037, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 12.489851575915692, |
|
"learning_rate": 5.803545003882554e-09, |
|
"logits/chosen": -1.0940172672271729, |
|
"logits/rejected": -1.2261667251586914, |
|
"logps/chosen": -262.1616516113281, |
|
"logps/rejected": -284.6984558105469, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0160671472549438, |
|
"rewards/margins": 0.3600946366786957, |
|
"rewards/rejected": -1.376161813735962, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9434968017057569, |
|
"grad_norm": 13.817135678609631, |
|
"learning_rate": 4.849172188709588e-09, |
|
"logits/chosen": -1.124348521232605, |
|
"logits/rejected": -1.223716139793396, |
|
"logps/chosen": -275.583251953125, |
|
"logps/rejected": -284.2818298339844, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.0598669052124023, |
|
"rewards/margins": 0.21211442351341248, |
|
"rewards/rejected": -1.2719814777374268, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.9488272921108742, |
|
"grad_norm": 17.35628545613914, |
|
"learning_rate": 3.979712667596669e-09, |
|
"logits/chosen": -1.0675632953643799, |
|
"logits/rejected": -1.1845059394836426, |
|
"logps/chosen": -253.7633819580078, |
|
"logps/rejected": -275.1048278808594, |
|
"loss": 0.5955, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.99676513671875, |
|
"rewards/margins": 0.29226452112197876, |
|
"rewards/rejected": -1.2890297174453735, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9541577825159915, |
|
"grad_norm": 16.142121068942174, |
|
"learning_rate": 3.195467596663254e-09, |
|
"logits/chosen": -1.131365180015564, |
|
"logits/rejected": -1.242356538772583, |
|
"logps/chosen": -240.4732208251953, |
|
"logps/rejected": -275.12884521484375, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9128969311714172, |
|
"rewards/margins": 0.4684675335884094, |
|
"rewards/rejected": -1.3813644647598267, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.9594882729211087, |
|
"grad_norm": 15.867103975451991, |
|
"learning_rate": 2.4967086161600814e-09, |
|
"logits/chosen": -1.082676649093628, |
|
"logits/rejected": -1.2301782369613647, |
|
"logps/chosen": -251.3439483642578, |
|
"logps/rejected": -259.95037841796875, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.056687355041504, |
|
"rewards/margins": 0.2243305891752243, |
|
"rewards/rejected": -1.2810180187225342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.964818763326226, |
|
"grad_norm": 14.494532565146372, |
|
"learning_rate": 1.8836777563805416e-09, |
|
"logits/chosen": -1.1500489711761475, |
|
"logits/rejected": -1.267773151397705, |
|
"logps/chosen": -262.1219177246094, |
|
"logps/rejected": -284.3946228027344, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9880873560905457, |
|
"rewards/margins": 0.3360677659511566, |
|
"rewards/rejected": -1.3241552114486694, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 13.7532663379059, |
|
"learning_rate": 1.3565873538283757e-09, |
|
"logits/chosen": -1.0888932943344116, |
|
"logits/rejected": -1.2862221002578735, |
|
"logps/chosen": -281.6429443359375, |
|
"logps/rejected": -287.7558898925781, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1525847911834717, |
|
"rewards/margins": 0.23554334044456482, |
|
"rewards/rejected": -1.3881282806396484, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9754797441364605, |
|
"grad_norm": 18.11310668134442, |
|
"learning_rate": 9.156199776702567e-10, |
|
"logits/chosen": -1.2114653587341309, |
|
"logits/rejected": -1.2886050939559937, |
|
"logps/chosen": -277.15283203125, |
|
"logps/rejected": -297.81988525390625, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1020017862319946, |
|
"rewards/margins": 0.3052862286567688, |
|
"rewards/rejected": -1.4072880744934082, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.9808102345415778, |
|
"grad_norm": 16.38268266725161, |
|
"learning_rate": 5.609283664990693e-10, |
|
"logits/chosen": -1.1473147869110107, |
|
"logits/rejected": -1.2061867713928223, |
|
"logps/chosen": -269.15692138671875, |
|
"logps/rejected": -297.3847351074219, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0159175395965576, |
|
"rewards/margins": 0.310077965259552, |
|
"rewards/rejected": -1.3259953260421753, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9861407249466951, |
|
"grad_norm": 15.464029236617364, |
|
"learning_rate": 2.926353754295896e-10, |
|
"logits/chosen": -1.2007001638412476, |
|
"logits/rejected": -1.333519458770752, |
|
"logps/chosen": -266.4494934082031, |
|
"logps/rejected": -296.6923522949219, |
|
"loss": 0.5848, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0838963985443115, |
|
"rewards/margins": 0.3357781171798706, |
|
"rewards/rejected": -1.4196745157241821, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9914712153518124, |
|
"grad_norm": 11.785459063580232, |
|
"learning_rate": 1.1083393354488491e-10, |
|
"logits/chosen": -1.0910792350769043, |
|
"logits/rejected": -1.14475417137146, |
|
"logps/chosen": -275.3228454589844, |
|
"logps/rejected": -302.8422546386719, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1003937721252441, |
|
"rewards/margins": 0.42316898703575134, |
|
"rewards/rejected": -1.5235626697540283, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9968017057569296, |
|
"grad_norm": 12.18026896660238, |
|
"learning_rate": 1.5587011708340092e-11, |
|
"logits/chosen": -1.0970802307128906, |
|
"logits/rejected": -1.167004108428955, |
|
"logps/chosen": -295.5314636230469, |
|
"logps/rejected": -330.5072937011719, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1293576955795288, |
|
"rewards/margins": 0.5504059791564941, |
|
"rewards/rejected": -1.6797635555267334, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 938, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6256769998495513, |
|
"train_runtime": 22377.6313, |
|
"train_samples_per_second": 2.683, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 938, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|