|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 11.290426517034053, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": 1.1020113229751587, |
|
"logits/rejected": 1.165069818496704, |
|
"logps/chosen": -165.85887145996094, |
|
"logps/rejected": -178.0421905517578, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 12.642674037441155, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": 0.9179312586784363, |
|
"logits/rejected": 0.8261909484863281, |
|
"logps/chosen": -410.791015625, |
|
"logps/rejected": -328.3950500488281, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": -0.00014805687533225864, |
|
"rewards/margins": 0.0001381068432237953, |
|
"rewards/rejected": -0.00028616367490030825, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 12.791180740303325, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": 0.9256755709648132, |
|
"logits/rejected": 0.9386581182479858, |
|
"logps/chosen": -278.5252380371094, |
|
"logps/rejected": -242.84823608398438, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0002471913758199662, |
|
"rewards/margins": -0.0008500738185830414, |
|
"rewards/rejected": 0.000602882297243923, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 11.807415189137759, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": 0.7909670472145081, |
|
"logits/rejected": 0.7317233681678772, |
|
"logps/chosen": -273.0486755371094, |
|
"logps/rejected": -276.9691162109375, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.00020443140238057822, |
|
"rewards/margins": -0.0006267547723837197, |
|
"rewards/rejected": 0.0004223231808282435, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 11.404068374494468, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": 0.8638267517089844, |
|
"logits/rejected": 0.8337821960449219, |
|
"logps/chosen": -277.4988098144531, |
|
"logps/rejected": -265.49273681640625, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0007294538663700223, |
|
"rewards/margins": 0.0005479886895045638, |
|
"rewards/rejected": 0.0001814651332097128, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 13.063518944634659, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": 0.8905746340751648, |
|
"logits/rejected": 0.8993768692016602, |
|
"logps/chosen": -301.5456848144531, |
|
"logps/rejected": -256.28350830078125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.0004059553029946983, |
|
"rewards/margins": -0.0005866002757102251, |
|
"rewards/rejected": 0.0009925555204972625, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 12.882875429409548, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": 0.8007913827896118, |
|
"logits/rejected": 0.8331505060195923, |
|
"logps/chosen": -318.5068054199219, |
|
"logps/rejected": -284.26190185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00027945329202339053, |
|
"rewards/margins": -0.0007258501136675477, |
|
"rewards/rejected": 0.0004463967925403267, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 11.434777428083084, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": 0.8525362014770508, |
|
"logits/rejected": 0.8576486706733704, |
|
"logps/chosen": -307.0369567871094, |
|
"logps/rejected": -293.033447265625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00017868136637844145, |
|
"rewards/margins": -0.0003002768207807094, |
|
"rewards/rejected": 0.00047895809984765947, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 12.302340768468458, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": 0.7650710344314575, |
|
"logits/rejected": 0.9396867752075195, |
|
"logps/chosen": -229.7215118408203, |
|
"logps/rejected": -267.52130126953125, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.000520746223628521, |
|
"rewards/margins": 0.00032199383713304996, |
|
"rewards/rejected": 0.00019875231373589486, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 12.033324705457542, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": 0.8275500535964966, |
|
"logits/rejected": 0.7720881700515747, |
|
"logps/chosen": -367.6588439941406, |
|
"logps/rejected": -316.90289306640625, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0005487919552251697, |
|
"rewards/margins": -0.00021695331088267267, |
|
"rewards/rejected": -0.000331838644342497, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 12.472143691246368, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": 0.9897788166999817, |
|
"logits/rejected": 1.1146961450576782, |
|
"logps/chosen": -287.7003173828125, |
|
"logps/rejected": -300.0032958984375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0009165392257273197, |
|
"rewards/margins": 4.667566463467665e-05, |
|
"rewards/rejected": -0.0009632149012759328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 10.47566701912818, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": 0.862777590751648, |
|
"logits/rejected": 0.8474942445755005, |
|
"logps/chosen": -255.17343139648438, |
|
"logps/rejected": -246.80679321289062, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0007340327138081193, |
|
"rewards/margins": 9.011449583340436e-05, |
|
"rewards/rejected": -0.0008241472532972693, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 11.729418421123865, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": 0.8798221349716187, |
|
"logits/rejected": 0.8902707099914551, |
|
"logps/chosen": -331.7406311035156, |
|
"logps/rejected": -299.10302734375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0016034040600061417, |
|
"rewards/margins": 0.0008947736350819468, |
|
"rewards/rejected": -0.002498177345842123, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 10.906048267354812, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": 0.8031892776489258, |
|
"logits/rejected": 0.8155500292778015, |
|
"logps/chosen": -240.92489624023438, |
|
"logps/rejected": -324.6410217285156, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0009776324732229114, |
|
"rewards/margins": 0.0024681445211172104, |
|
"rewards/rejected": -0.0034457766450941563, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 12.066753821066918, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": 0.9222855567932129, |
|
"logits/rejected": 0.8834908604621887, |
|
"logps/chosen": -314.27435302734375, |
|
"logps/rejected": -301.2237854003906, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.003638929221779108, |
|
"rewards/margins": 0.0016478378092870116, |
|
"rewards/rejected": -0.0052867671474814415, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 12.461323392055261, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": 0.8271953463554382, |
|
"logits/rejected": 0.8963567614555359, |
|
"logps/chosen": -255.406005859375, |
|
"logps/rejected": -246.0330352783203, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.003589408006519079, |
|
"rewards/margins": 0.003603331744670868, |
|
"rewards/rejected": -0.007192739751189947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 12.837513374911397, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": 0.8090554475784302, |
|
"logits/rejected": 0.7438990473747253, |
|
"logps/chosen": -327.182373046875, |
|
"logps/rejected": -251.8794708251953, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.003899016883224249, |
|
"rewards/margins": 0.0035891197621822357, |
|
"rewards/rejected": -0.007488137576729059, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 11.693960987426166, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": 0.9418096542358398, |
|
"logits/rejected": 0.9179880023002625, |
|
"logps/chosen": -373.8608093261719, |
|
"logps/rejected": -364.60565185546875, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.008983137086033821, |
|
"rewards/margins": 0.0037296731024980545, |
|
"rewards/rejected": -0.012712809257209301, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 13.093807399181138, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": 0.7935225367546082, |
|
"logits/rejected": 0.8044607043266296, |
|
"logps/chosen": -255.5353546142578, |
|
"logps/rejected": -246.3568115234375, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.006067480426281691, |
|
"rewards/margins": 0.00562057550996542, |
|
"rewards/rejected": -0.011688055470585823, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 14.283584623097973, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": 0.8921982645988464, |
|
"logits/rejected": 0.875672459602356, |
|
"logps/chosen": -337.9564514160156, |
|
"logps/rejected": -272.2162170410156, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.008288905955851078, |
|
"rewards/margins": 0.007777436636388302, |
|
"rewards/rejected": -0.01606634259223938, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 10.889194335114492, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": 0.7922821640968323, |
|
"logits/rejected": 0.8541098833084106, |
|
"logps/chosen": -336.982177734375, |
|
"logps/rejected": -318.88372802734375, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.008277793414890766, |
|
"rewards/margins": 0.013390962965786457, |
|
"rewards/rejected": -0.021668758243322372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 12.286406041336088, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": 0.7591502070426941, |
|
"logits/rejected": 0.7965251803398132, |
|
"logps/chosen": -256.0464172363281, |
|
"logps/rejected": -279.0765380859375, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.015102909877896309, |
|
"rewards/margins": 0.008513028733432293, |
|
"rewards/rejected": -0.023615941405296326, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 11.650837722032147, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": 0.8363003730773926, |
|
"logits/rejected": 0.8418310880661011, |
|
"logps/chosen": -341.4637756347656, |
|
"logps/rejected": -334.71722412109375, |
|
"loss": 0.6873, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.021802758798003197, |
|
"rewards/margins": 0.012052404694259167, |
|
"rewards/rejected": -0.03385516256093979, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 12.17094186601859, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": 0.9586321711540222, |
|
"logits/rejected": 1.0120365619659424, |
|
"logps/chosen": -258.8882141113281, |
|
"logps/rejected": -295.0778503417969, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.021029580384492874, |
|
"rewards/margins": 0.024138668552041054, |
|
"rewards/rejected": -0.04516825079917908, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 13.555388385039116, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": 0.8616135716438293, |
|
"logits/rejected": 0.860598087310791, |
|
"logps/chosen": -327.2540588378906, |
|
"logps/rejected": -277.73858642578125, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.025776376947760582, |
|
"rewards/margins": 0.029360895976424217, |
|
"rewards/rejected": -0.0551372691988945, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 12.974446675999463, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": 0.9154967069625854, |
|
"logits/rejected": 0.953376293182373, |
|
"logps/chosen": -261.62432861328125, |
|
"logps/rejected": -251.11181640625, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.035514116287231445, |
|
"rewards/margins": 0.020228754729032516, |
|
"rewards/rejected": -0.05574287101626396, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 12.083196573389626, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": 0.7959989309310913, |
|
"logits/rejected": 0.9321529269218445, |
|
"logps/chosen": -291.46197509765625, |
|
"logps/rejected": -285.123291015625, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.039493151009082794, |
|
"rewards/margins": 0.040632762014865875, |
|
"rewards/rejected": -0.08012591302394867, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 12.75510090459433, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": 0.9645400047302246, |
|
"logits/rejected": 0.8777379989624023, |
|
"logps/chosen": -312.07080078125, |
|
"logps/rejected": -250.50857543945312, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.04036339372396469, |
|
"rewards/margins": 0.02411934733390808, |
|
"rewards/rejected": -0.06448274105787277, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 13.510481134396441, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": 0.8545993566513062, |
|
"logits/rejected": 0.9058882594108582, |
|
"logps/chosen": -325.89508056640625, |
|
"logps/rejected": -312.71478271484375, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06212998181581497, |
|
"rewards/margins": 0.05864911153912544, |
|
"rewards/rejected": -0.12077908217906952, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 11.640828173777832, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": 0.9046756029129028, |
|
"logits/rejected": 0.9748743176460266, |
|
"logps/chosen": -317.8568420410156, |
|
"logps/rejected": -289.9772033691406, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.08539162576198578, |
|
"rewards/margins": 0.06998683512210846, |
|
"rewards/rejected": -0.15537847578525543, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 13.967496487157206, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": 0.8389209508895874, |
|
"logits/rejected": 0.7966829538345337, |
|
"logps/chosen": -292.8455505371094, |
|
"logps/rejected": -308.762451171875, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12145288288593292, |
|
"rewards/margins": 0.07226552069187164, |
|
"rewards/rejected": -0.19371840357780457, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 11.85245858072109, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": 0.9054175615310669, |
|
"logits/rejected": 0.8772112727165222, |
|
"logps/chosen": -306.7538146972656, |
|
"logps/rejected": -262.8247985839844, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13142530620098114, |
|
"rewards/margins": 0.0997721403837204, |
|
"rewards/rejected": -0.23119744658470154, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 11.983652317145598, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": 0.8777133226394653, |
|
"logits/rejected": 0.880419909954071, |
|
"logps/chosen": -313.6241455078125, |
|
"logps/rejected": -279.3583984375, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1642109900712967, |
|
"rewards/margins": 0.06335286051034927, |
|
"rewards/rejected": -0.22756382822990417, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 12.597865260643777, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": 0.8802727460861206, |
|
"logits/rejected": 0.9690290689468384, |
|
"logps/chosen": -291.7769470214844, |
|
"logps/rejected": -340.9366455078125, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.2159583568572998, |
|
"rewards/margins": 0.12743426859378815, |
|
"rewards/rejected": -0.34339264035224915, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 12.031574034409454, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": 1.0568187236785889, |
|
"logits/rejected": 0.9909990429878235, |
|
"logps/chosen": -276.7728576660156, |
|
"logps/rejected": -308.56048583984375, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18150362372398376, |
|
"rewards/margins": 0.15542708337306976, |
|
"rewards/rejected": -0.33693069219589233, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 14.09254079956678, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": 0.7211519479751587, |
|
"logits/rejected": 0.715661883354187, |
|
"logps/chosen": -328.1579284667969, |
|
"logps/rejected": -335.1773986816406, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.2924322783946991, |
|
"rewards/margins": 0.11239685118198395, |
|
"rewards/rejected": -0.40482911467552185, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 14.373438087738135, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": 1.0743309259414673, |
|
"logits/rejected": 0.9853240251541138, |
|
"logps/chosen": -320.19158935546875, |
|
"logps/rejected": -270.3163146972656, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3367859423160553, |
|
"rewards/margins": 0.08087924122810364, |
|
"rewards/rejected": -0.41766518354415894, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 14.669348649143826, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": 0.9024693369865417, |
|
"logits/rejected": 1.007645606994629, |
|
"logps/chosen": -297.4977111816406, |
|
"logps/rejected": -319.9523010253906, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3957563638687134, |
|
"rewards/margins": 0.1617899388074875, |
|
"rewards/rejected": -0.5575462579727173, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 12.430586958233993, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": 0.7547154426574707, |
|
"logits/rejected": 0.9096601605415344, |
|
"logps/chosen": -291.98260498046875, |
|
"logps/rejected": -299.932861328125, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.26660218834877014, |
|
"rewards/margins": 0.16147901117801666, |
|
"rewards/rejected": -0.42808112502098083, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 15.743603343827022, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": 0.9608089327812195, |
|
"logits/rejected": 1.006642460823059, |
|
"logps/chosen": -337.75048828125, |
|
"logps/rejected": -310.911376953125, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4503099322319031, |
|
"rewards/margins": 0.10561531782150269, |
|
"rewards/rejected": -0.5559252500534058, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 13.296795216212011, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": 0.9124090075492859, |
|
"logits/rejected": 0.9337595701217651, |
|
"logps/chosen": -335.5029296875, |
|
"logps/rejected": -336.509033203125, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5172705054283142, |
|
"rewards/margins": 0.24937620759010315, |
|
"rewards/rejected": -0.7666467428207397, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 16.087465846751012, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": 0.77690190076828, |
|
"logits/rejected": 0.8211329579353333, |
|
"logps/chosen": -319.15301513671875, |
|
"logps/rejected": -365.4151611328125, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5081580877304077, |
|
"rewards/margins": 0.23018617928028107, |
|
"rewards/rejected": -0.7383443117141724, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 17.466087793122092, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": 0.9634016752243042, |
|
"logits/rejected": 1.0442949533462524, |
|
"logps/chosen": -432.4740295410156, |
|
"logps/rejected": -414.32666015625, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6796163320541382, |
|
"rewards/margins": 0.297426700592041, |
|
"rewards/rejected": -0.9770429730415344, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 17.06585616280862, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": 0.8025485277175903, |
|
"logits/rejected": 0.9477583169937134, |
|
"logps/chosen": -335.64129638671875, |
|
"logps/rejected": -348.13201904296875, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5576916933059692, |
|
"rewards/margins": 0.2742641270160675, |
|
"rewards/rejected": -0.8319557905197144, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 14.864309504803636, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": 0.8711138963699341, |
|
"logits/rejected": 0.8473278880119324, |
|
"logps/chosen": -354.7337646484375, |
|
"logps/rejected": -355.3828125, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6624523401260376, |
|
"rewards/margins": 0.12896910309791565, |
|
"rewards/rejected": -0.7914214134216309, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 14.807426790938989, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": 0.7770389318466187, |
|
"logits/rejected": 0.7765820622444153, |
|
"logps/chosen": -323.10968017578125, |
|
"logps/rejected": -317.7047119140625, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6039828062057495, |
|
"rewards/margins": 0.15776701271533966, |
|
"rewards/rejected": -0.761749804019928, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 15.367304865931045, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": 0.880317211151123, |
|
"logits/rejected": 0.842047393321991, |
|
"logps/chosen": -346.06866455078125, |
|
"logps/rejected": -363.48333740234375, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.40573984384536743, |
|
"rewards/margins": 0.29732540249824524, |
|
"rewards/rejected": -0.7030652165412903, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 15.949013687358022, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": 0.7507516741752625, |
|
"logits/rejected": 0.8210875391960144, |
|
"logps/chosen": -288.9540710449219, |
|
"logps/rejected": -307.3976135253906, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.48658061027526855, |
|
"rewards/margins": 0.1853361576795578, |
|
"rewards/rejected": -0.671916663646698, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 16.9694155509272, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": 0.7397254109382629, |
|
"logits/rejected": 0.9247487187385559, |
|
"logps/chosen": -316.98785400390625, |
|
"logps/rejected": -398.54302978515625, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.48844990134239197, |
|
"rewards/margins": 0.33046063780784607, |
|
"rewards/rejected": -0.8189105987548828, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 15.78316684748212, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": 0.8742391467094421, |
|
"logits/rejected": 0.8716489672660828, |
|
"logps/chosen": -293.3537292480469, |
|
"logps/rejected": -325.4706726074219, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3982633650302887, |
|
"rewards/margins": 0.4348866045475006, |
|
"rewards/rejected": -0.8331500291824341, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 17.430261021826254, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": 0.8817283511161804, |
|
"logits/rejected": 0.8894240260124207, |
|
"logps/chosen": -379.4447326660156, |
|
"logps/rejected": -356.8714599609375, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7444159388542175, |
|
"rewards/margins": 0.18644893169403076, |
|
"rewards/rejected": -0.9308649301528931, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 15.352342487594235, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": 0.7498997449874878, |
|
"logits/rejected": 0.8734132647514343, |
|
"logps/chosen": -334.142822265625, |
|
"logps/rejected": -378.78839111328125, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5240365862846375, |
|
"rewards/margins": 0.5625481605529785, |
|
"rewards/rejected": -1.0865848064422607, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 17.82749679734051, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": 0.7966002225875854, |
|
"logits/rejected": 0.7500957250595093, |
|
"logps/chosen": -385.6920166015625, |
|
"logps/rejected": -407.591552734375, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7459064722061157, |
|
"rewards/margins": 0.31928759813308716, |
|
"rewards/rejected": -1.0651941299438477, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 16.784735945624263, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": 0.6152772903442383, |
|
"logits/rejected": 0.7151674032211304, |
|
"logps/chosen": -299.0379943847656, |
|
"logps/rejected": -367.5239562988281, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5808143615722656, |
|
"rewards/margins": 0.37133264541625977, |
|
"rewards/rejected": -0.9521470069885254, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 15.174838695691413, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": 0.6313377618789673, |
|
"logits/rejected": 0.6896692514419556, |
|
"logps/chosen": -331.22296142578125, |
|
"logps/rejected": -373.2359313964844, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5576273798942566, |
|
"rewards/margins": 0.5619553327560425, |
|
"rewards/rejected": -1.1195827722549438, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 14.111267602676048, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": 0.7313958406448364, |
|
"logits/rejected": 0.6984618902206421, |
|
"logps/chosen": -400.5677795410156, |
|
"logps/rejected": -427.5818786621094, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7098091840744019, |
|
"rewards/margins": 0.32652807235717773, |
|
"rewards/rejected": -1.03633713722229, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 16.422136951074723, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": 0.7890074253082275, |
|
"logits/rejected": 0.697285532951355, |
|
"logps/chosen": -271.3017272949219, |
|
"logps/rejected": -260.1424865722656, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5347421169281006, |
|
"rewards/margins": 0.14706984162330627, |
|
"rewards/rejected": -0.6818119287490845, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 18.921421716557443, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": 0.6331374049186707, |
|
"logits/rejected": 0.7241517305374146, |
|
"logps/chosen": -383.59393310546875, |
|
"logps/rejected": -467.81591796875, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7832542061805725, |
|
"rewards/margins": 0.5469503402709961, |
|
"rewards/rejected": -1.330204725265503, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 22.144464177364856, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": 0.6505743265151978, |
|
"logits/rejected": 0.7051428556442261, |
|
"logps/chosen": -349.1499938964844, |
|
"logps/rejected": -378.1747131347656, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6620784401893616, |
|
"rewards/margins": 0.4197353422641754, |
|
"rewards/rejected": -1.0818138122558594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 21.853395092107863, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": 0.7972511053085327, |
|
"logits/rejected": 0.7657841444015503, |
|
"logps/chosen": -327.83294677734375, |
|
"logps/rejected": -362.2701721191406, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.864510715007782, |
|
"rewards/margins": 0.39511948823928833, |
|
"rewards/rejected": -1.2596302032470703, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 17.131174415142638, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": 0.7822259068489075, |
|
"logits/rejected": 0.7136083841323853, |
|
"logps/chosen": -390.30084228515625, |
|
"logps/rejected": -380.1099548339844, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6453007459640503, |
|
"rewards/margins": 0.44277459383010864, |
|
"rewards/rejected": -1.0880753993988037, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 17.663633787684425, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": 0.5881372690200806, |
|
"logits/rejected": 0.6206382513046265, |
|
"logps/chosen": -356.79901123046875, |
|
"logps/rejected": -371.9761047363281, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6707444787025452, |
|
"rewards/margins": 0.4131700396537781, |
|
"rewards/rejected": -1.0839145183563232, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 18.054692834695842, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": 0.8582113981246948, |
|
"logits/rejected": 0.7811607122421265, |
|
"logps/chosen": -460.695556640625, |
|
"logps/rejected": -398.85345458984375, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8490235209465027, |
|
"rewards/margins": 0.4152224659919739, |
|
"rewards/rejected": -1.2642459869384766, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 16.657440247472014, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": 0.7700496912002563, |
|
"logits/rejected": 0.8080910444259644, |
|
"logps/chosen": -296.64111328125, |
|
"logps/rejected": -339.17388916015625, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.550514817237854, |
|
"rewards/margins": 0.4359719753265381, |
|
"rewards/rejected": -0.9864869117736816, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 18.818954073381697, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": 0.7167537212371826, |
|
"logits/rejected": 0.7465007305145264, |
|
"logps/chosen": -320.0273132324219, |
|
"logps/rejected": -379.2918701171875, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6476621627807617, |
|
"rewards/margins": 0.5033532977104187, |
|
"rewards/rejected": -1.1510154008865356, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 19.432326737759745, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": 0.685808002948761, |
|
"logits/rejected": 0.748884916305542, |
|
"logps/chosen": -381.24188232421875, |
|
"logps/rejected": -390.5279846191406, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.847141444683075, |
|
"rewards/margins": 0.24964895844459534, |
|
"rewards/rejected": -1.0967904329299927, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 15.565167912174749, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": 0.579333484172821, |
|
"logits/rejected": 0.6070585250854492, |
|
"logps/chosen": -319.1553955078125, |
|
"logps/rejected": -362.86920166015625, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6768738627433777, |
|
"rewards/margins": 0.6027628779411316, |
|
"rewards/rejected": -1.2796367406845093, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 19.32420588879935, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": 0.5981010794639587, |
|
"logits/rejected": 0.626388669013977, |
|
"logps/chosen": -407.26708984375, |
|
"logps/rejected": -400.2440490722656, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.66740882396698, |
|
"rewards/margins": 0.18918684124946594, |
|
"rewards/rejected": -0.8565956354141235, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 15.975618010502641, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": 0.6756628751754761, |
|
"logits/rejected": 0.6775110960006714, |
|
"logps/chosen": -328.9629821777344, |
|
"logps/rejected": -333.85150146484375, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5073133111000061, |
|
"rewards/margins": 0.3033245801925659, |
|
"rewards/rejected": -0.8106378316879272, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 15.691147213625676, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": 0.7397656440734863, |
|
"logits/rejected": 0.7417260408401489, |
|
"logps/chosen": -319.7871398925781, |
|
"logps/rejected": -361.149658203125, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5759695172309875, |
|
"rewards/margins": 0.34965625405311584, |
|
"rewards/rejected": -0.9256256818771362, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 16.01928013375132, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": 0.6512678861618042, |
|
"logits/rejected": 0.6891155242919922, |
|
"logps/chosen": -295.92822265625, |
|
"logps/rejected": -338.79608154296875, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5925602316856384, |
|
"rewards/margins": 0.686859130859375, |
|
"rewards/rejected": -1.2794193029403687, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 19.518133270314312, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": 0.5287461876869202, |
|
"logits/rejected": 0.5766854286193848, |
|
"logps/chosen": -400.12713623046875, |
|
"logps/rejected": -398.5772705078125, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6956884264945984, |
|
"rewards/margins": 0.593571662902832, |
|
"rewards/rejected": -1.2892601490020752, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 17.593931676516338, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": 0.5889393091201782, |
|
"logits/rejected": 0.5955343842506409, |
|
"logps/chosen": -341.21307373046875, |
|
"logps/rejected": -389.63897705078125, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6909217238426208, |
|
"rewards/margins": 0.5461858510971069, |
|
"rewards/rejected": -1.237107515335083, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 23.194816745121578, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": 0.48078519105911255, |
|
"logits/rejected": 0.45681601762771606, |
|
"logps/chosen": -333.8201904296875, |
|
"logps/rejected": -357.1300964355469, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6982260346412659, |
|
"rewards/margins": 0.46993082761764526, |
|
"rewards/rejected": -1.1681568622589111, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 18.34717096478732, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": 0.5538076162338257, |
|
"logits/rejected": 0.596636176109314, |
|
"logps/chosen": -352.73931884765625, |
|
"logps/rejected": -347.88494873046875, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5955497622489929, |
|
"rewards/margins": 0.2640233635902405, |
|
"rewards/rejected": -0.8595730662345886, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 18.33581257388434, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": 0.43998318910598755, |
|
"logits/rejected": 0.47118696570396423, |
|
"logps/chosen": -346.0210266113281, |
|
"logps/rejected": -386.53839111328125, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6450529098510742, |
|
"rewards/margins": 0.45645374059677124, |
|
"rewards/rejected": -1.1015068292617798, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 15.98215793865638, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": 0.5963197946548462, |
|
"logits/rejected": 0.6680821180343628, |
|
"logps/chosen": -442.1761779785156, |
|
"logps/rejected": -464.924072265625, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9059039950370789, |
|
"rewards/margins": 0.32418501377105713, |
|
"rewards/rejected": -1.2300891876220703, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 19.34691799566141, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": 0.480471134185791, |
|
"logits/rejected": 0.4600948691368103, |
|
"logps/chosen": -396.38311767578125, |
|
"logps/rejected": -465.755126953125, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6615660786628723, |
|
"rewards/margins": 0.7741894721984863, |
|
"rewards/rejected": -1.4357554912567139, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 21.646866570334893, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 0.38621702790260315, |
|
"logits/rejected": 0.41974037885665894, |
|
"logps/chosen": -340.9786071777344, |
|
"logps/rejected": -393.6761779785156, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7244313955307007, |
|
"rewards/margins": 0.48300299048423767, |
|
"rewards/rejected": -1.2074342966079712, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 18.58507989863696, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": 0.39947646856307983, |
|
"logits/rejected": 0.4246433675289154, |
|
"logps/chosen": -320.23309326171875, |
|
"logps/rejected": -370.582275390625, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.79985511302948, |
|
"rewards/margins": 0.4771038591861725, |
|
"rewards/rejected": -1.2769590616226196, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 20.35280429241367, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": 0.592739999294281, |
|
"logits/rejected": 0.6939637660980225, |
|
"logps/chosen": -408.095947265625, |
|
"logps/rejected": -502.061279296875, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9894254803657532, |
|
"rewards/margins": 1.0690934658050537, |
|
"rewards/rejected": -2.058518886566162, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 26.00633546734353, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": 0.35590919852256775, |
|
"logits/rejected": 0.37258046865463257, |
|
"logps/chosen": -347.53271484375, |
|
"logps/rejected": -367.4189453125, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8420926928520203, |
|
"rewards/margins": 0.4168141484260559, |
|
"rewards/rejected": -1.2589069604873657, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 23.898332577859907, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": 0.3112553358078003, |
|
"logits/rejected": 0.35031309723854065, |
|
"logps/chosen": -382.4200439453125, |
|
"logps/rejected": -385.7547912597656, |
|
"loss": 0.6063, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.877010703086853, |
|
"rewards/margins": 0.20091959834098816, |
|
"rewards/rejected": -1.0779304504394531, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 19.075991074028558, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": 0.3584078848361969, |
|
"logits/rejected": 0.4070889949798584, |
|
"logps/chosen": -362.82464599609375, |
|
"logps/rejected": -392.70263671875, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8209225535392761, |
|
"rewards/margins": 0.5464334487915039, |
|
"rewards/rejected": -1.3673560619354248, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 21.07807830955708, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": 0.45863986015319824, |
|
"logits/rejected": 0.4786900579929352, |
|
"logps/chosen": -357.07562255859375, |
|
"logps/rejected": -361.09991455078125, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8064752817153931, |
|
"rewards/margins": 0.33143192529678345, |
|
"rewards/rejected": -1.1379071474075317, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 18.051907454620828, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": 0.3541174530982971, |
|
"logits/rejected": 0.38320186734199524, |
|
"logps/chosen": -377.2417297363281, |
|
"logps/rejected": -423.63092041015625, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5865625143051147, |
|
"rewards/margins": 0.7768771648406982, |
|
"rewards/rejected": -1.3634397983551025, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 22.343500765452667, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": 0.38612139225006104, |
|
"logits/rejected": 0.4179624617099762, |
|
"logps/chosen": -313.2272033691406, |
|
"logps/rejected": -380.7272033691406, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6393442749977112, |
|
"rewards/margins": 0.4778464436531067, |
|
"rewards/rejected": -1.1171907186508179, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 17.19888281834333, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": 0.38400429487228394, |
|
"logits/rejected": 0.4022274911403656, |
|
"logps/chosen": -363.2855224609375, |
|
"logps/rejected": -448.18951416015625, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6849557757377625, |
|
"rewards/margins": 1.044877529144287, |
|
"rewards/rejected": -1.7298332452774048, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 20.20842626295358, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": 0.42639732360839844, |
|
"logits/rejected": 0.41247129440307617, |
|
"logps/chosen": -354.7998046875, |
|
"logps/rejected": -369.0785827636719, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6992335915565491, |
|
"rewards/margins": 0.3163672387599945, |
|
"rewards/rejected": -1.0156008005142212, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 19.600972978229347, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": 0.5787872672080994, |
|
"logits/rejected": 0.5056115388870239, |
|
"logps/chosen": -356.230712890625, |
|
"logps/rejected": -428.4977111816406, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8336772918701172, |
|
"rewards/margins": 0.7834388017654419, |
|
"rewards/rejected": -1.6171162128448486, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 18.631627455895288, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": 0.5892560482025146, |
|
"logits/rejected": 0.6063620448112488, |
|
"logps/chosen": -355.5292053222656, |
|
"logps/rejected": -388.40380859375, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7971435785293579, |
|
"rewards/margins": 0.25393742322921753, |
|
"rewards/rejected": -1.0510809421539307, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 17.022161111644575, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": 0.46557942032814026, |
|
"logits/rejected": 0.48981013894081116, |
|
"logps/chosen": -307.93316650390625, |
|
"logps/rejected": -381.57257080078125, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6179824471473694, |
|
"rewards/margins": 0.7625503540039062, |
|
"rewards/rejected": -1.3805327415466309, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 20.84884348212129, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": 0.4536976218223572, |
|
"logits/rejected": 0.4783639907836914, |
|
"logps/chosen": -462.7845764160156, |
|
"logps/rejected": -509.302490234375, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3845326900482178, |
|
"rewards/margins": 0.4757843613624573, |
|
"rewards/rejected": -1.8603169918060303, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 17.49618245187623, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": 0.432155042886734, |
|
"logits/rejected": 0.4641633927822113, |
|
"logps/chosen": -375.1294250488281, |
|
"logps/rejected": -427.8644104003906, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8244571685791016, |
|
"rewards/margins": 0.5350643992424011, |
|
"rewards/rejected": -1.3595216274261475, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 22.293080856613333, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": 0.5172699093818665, |
|
"logits/rejected": 0.5119847655296326, |
|
"logps/chosen": -402.71343994140625, |
|
"logps/rejected": -483.53826904296875, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9808033108711243, |
|
"rewards/margins": 0.9733982086181641, |
|
"rewards/rejected": -1.9542014598846436, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 25.62636571756774, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": 0.5554766058921814, |
|
"logits/rejected": 0.49316105246543884, |
|
"logps/chosen": -416.37054443359375, |
|
"logps/rejected": -398.11151123046875, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9982360601425171, |
|
"rewards/margins": 0.581201434135437, |
|
"rewards/rejected": -1.5794376134872437, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 23.381466639480767, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": 0.4861147403717041, |
|
"logits/rejected": 0.47837033867836, |
|
"logps/chosen": -430.23321533203125, |
|
"logps/rejected": -403.3463439941406, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.9009541273117065, |
|
"rewards/margins": 0.359774112701416, |
|
"rewards/rejected": -1.260728120803833, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 18.23110087924843, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": 0.5253990888595581, |
|
"logits/rejected": 0.4784235954284668, |
|
"logps/chosen": -447.5752868652344, |
|
"logps/rejected": -473.0809631347656, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9549976587295532, |
|
"rewards/margins": 0.6409324407577515, |
|
"rewards/rejected": -1.5959299802780151, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 19.550183415249666, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": 0.45140281319618225, |
|
"logits/rejected": 0.5012277960777283, |
|
"logps/chosen": -384.2790832519531, |
|
"logps/rejected": -401.28509521484375, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8066235780715942, |
|
"rewards/margins": 0.3241332173347473, |
|
"rewards/rejected": -1.1307567358016968, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 20.922330115257477, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": 0.43720340728759766, |
|
"logits/rejected": 0.5218846797943115, |
|
"logps/chosen": -401.09423828125, |
|
"logps/rejected": -409.0234375, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0844993591308594, |
|
"rewards/margins": 0.5277708768844604, |
|
"rewards/rejected": -1.6122701168060303, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 20.924843509053616, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": 0.5131865739822388, |
|
"logits/rejected": 0.5137609243392944, |
|
"logps/chosen": -407.2503356933594, |
|
"logps/rejected": -483.7713928222656, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2562562227249146, |
|
"rewards/margins": 0.7707854509353638, |
|
"rewards/rejected": -2.0270419120788574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 22.18855281521568, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": 0.6776459217071533, |
|
"logits/rejected": 0.6498035788536072, |
|
"logps/chosen": -403.9708557128906, |
|
"logps/rejected": -523.07177734375, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1393872499465942, |
|
"rewards/margins": 0.7758584022521973, |
|
"rewards/rejected": -1.9152456521987915, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 21.52071416914918, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": 0.5453814268112183, |
|
"logits/rejected": 0.6066008806228638, |
|
"logps/chosen": -425.591796875, |
|
"logps/rejected": -567.7387084960938, |
|
"loss": 0.5724, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3399444818496704, |
|
"rewards/margins": 1.0568122863769531, |
|
"rewards/rejected": -2.396756887435913, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 20.60341994179207, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": 0.6149223446846008, |
|
"logits/rejected": 0.6715623736381531, |
|
"logps/chosen": -438.75714111328125, |
|
"logps/rejected": -463.2662048339844, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1835300922393799, |
|
"rewards/margins": 0.9701372981071472, |
|
"rewards/rejected": -2.153667449951172, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 28.539316703811078, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": 0.5007244348526001, |
|
"logits/rejected": 0.5865780711174011, |
|
"logps/chosen": -385.4056701660156, |
|
"logps/rejected": -439.2000427246094, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0280896425247192, |
|
"rewards/margins": 0.6651249527931213, |
|
"rewards/rejected": -1.6932144165039062, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 19.865896292426036, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": 0.40418606996536255, |
|
"logits/rejected": 0.3820621371269226, |
|
"logps/chosen": -416.3582458496094, |
|
"logps/rejected": -402.7046203613281, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3050010204315186, |
|
"rewards/margins": 0.48114338517189026, |
|
"rewards/rejected": -1.786144495010376, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 24.85634399900053, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": 0.5298096537590027, |
|
"logits/rejected": 0.5856395959854126, |
|
"logps/chosen": -398.751708984375, |
|
"logps/rejected": -430.79644775390625, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9989835619926453, |
|
"rewards/margins": 0.6439894437789917, |
|
"rewards/rejected": -1.6429731845855713, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 20.86937531865178, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": 0.5040396451950073, |
|
"logits/rejected": 0.5275683999061584, |
|
"logps/chosen": -379.1663513183594, |
|
"logps/rejected": -476.4314880371094, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9181860089302063, |
|
"rewards/margins": 0.8310756683349609, |
|
"rewards/rejected": -1.749261498451233, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 23.1819970794382, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": 0.49801892042160034, |
|
"logits/rejected": 0.5010608434677124, |
|
"logps/chosen": -438.33074951171875, |
|
"logps/rejected": -599.0955810546875, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3764212131500244, |
|
"rewards/margins": 1.2255936861038208, |
|
"rewards/rejected": -2.6020150184631348, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 30.911649534486866, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": 0.43349236249923706, |
|
"logits/rejected": 0.5261619687080383, |
|
"logps/chosen": -420.1570739746094, |
|
"logps/rejected": -584.062744140625, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3903928995132446, |
|
"rewards/margins": 1.3632363080978394, |
|
"rewards/rejected": -2.753629207611084, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 24.211694093238176, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": 0.21989259123802185, |
|
"logits/rejected": 0.2885194420814514, |
|
"logps/chosen": -407.5654296875, |
|
"logps/rejected": -481.28448486328125, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.104230523109436, |
|
"rewards/margins": 1.0199384689331055, |
|
"rewards/rejected": -2.124168872833252, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 25.049981314231243, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": 0.3270975947380066, |
|
"logits/rejected": 0.33125847578048706, |
|
"logps/chosen": -427.4678649902344, |
|
"logps/rejected": -485.11590576171875, |
|
"loss": 0.5704, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1138917207717896, |
|
"rewards/margins": 0.8963919878005981, |
|
"rewards/rejected": -2.0102837085723877, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 21.939690926454343, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": 0.4945638179779053, |
|
"logits/rejected": 0.4895549416542053, |
|
"logps/chosen": -299.2240295410156, |
|
"logps/rejected": -380.4999084472656, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8892961740493774, |
|
"rewards/margins": 0.7147836089134216, |
|
"rewards/rejected": -1.6040798425674438, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 23.8591260962465, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": 0.4953368604183197, |
|
"logits/rejected": 0.5128699541091919, |
|
"logps/chosen": -334.7842712402344, |
|
"logps/rejected": -490.0814514160156, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.113507628440857, |
|
"rewards/margins": 1.3171967267990112, |
|
"rewards/rejected": -2.430704355239868, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 20.28733876953354, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": 0.46798864006996155, |
|
"logits/rejected": 0.5210005044937134, |
|
"logps/chosen": -332.076904296875, |
|
"logps/rejected": -446.70513916015625, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9099678993225098, |
|
"rewards/margins": 1.131079912185669, |
|
"rewards/rejected": -2.041048049926758, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 24.87749514003154, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": 0.3501337170600891, |
|
"logits/rejected": 0.44954681396484375, |
|
"logps/chosen": -417.104736328125, |
|
"logps/rejected": -438.91363525390625, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1025865077972412, |
|
"rewards/margins": 0.5667014122009277, |
|
"rewards/rejected": -1.669287919998169, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 27.415480953957644, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": 0.47420987486839294, |
|
"logits/rejected": 0.5211541652679443, |
|
"logps/chosen": -319.5274353027344, |
|
"logps/rejected": -489.9903869628906, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1082285642623901, |
|
"rewards/margins": 1.3356571197509766, |
|
"rewards/rejected": -2.443885564804077, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 29.146901804579972, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": 0.4965229034423828, |
|
"logits/rejected": 0.5144367814064026, |
|
"logps/chosen": -405.8136291503906, |
|
"logps/rejected": -467.6885681152344, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3548388481140137, |
|
"rewards/margins": 0.7498202919960022, |
|
"rewards/rejected": -2.10465931892395, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 22.234128106063636, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": 0.4783782362937927, |
|
"logits/rejected": 0.5079411864280701, |
|
"logps/chosen": -382.99554443359375, |
|
"logps/rejected": -473.92413330078125, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.059499979019165, |
|
"rewards/margins": 1.3445496559143066, |
|
"rewards/rejected": -2.4040493965148926, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 26.81987709091395, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": 0.3504638969898224, |
|
"logits/rejected": 0.40155625343322754, |
|
"logps/chosen": -391.93609619140625, |
|
"logps/rejected": -485.8604431152344, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.090312123298645, |
|
"rewards/margins": 1.276283860206604, |
|
"rewards/rejected": -2.366596221923828, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 28.479329727395974, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": 0.4352048337459564, |
|
"logits/rejected": 0.439187228679657, |
|
"logps/chosen": -401.4508361816406, |
|
"logps/rejected": -479.16046142578125, |
|
"loss": 0.5459, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2591068744659424, |
|
"rewards/margins": 1.0630078315734863, |
|
"rewards/rejected": -2.3221144676208496, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 24.36280340279443, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": 0.3423922657966614, |
|
"logits/rejected": 0.3217991888523102, |
|
"logps/chosen": -408.8718566894531, |
|
"logps/rejected": -476.89349365234375, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2676352262496948, |
|
"rewards/margins": 1.0237175226211548, |
|
"rewards/rejected": -2.2913527488708496, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 23.976543809274407, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": 0.4693296551704407, |
|
"logits/rejected": 0.4371146261692047, |
|
"logps/chosen": -361.5362548828125, |
|
"logps/rejected": -458.41119384765625, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9556007385253906, |
|
"rewards/margins": 0.8090912103652954, |
|
"rewards/rejected": -1.764691948890686, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 26.124294099974634, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": 0.33999913930892944, |
|
"logits/rejected": 0.3430488705635071, |
|
"logps/chosen": -358.56488037109375, |
|
"logps/rejected": -402.3939514160156, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.095595121383667, |
|
"rewards/margins": 0.5801312327384949, |
|
"rewards/rejected": -1.675726294517517, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 20.33193428165945, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": 0.4010500907897949, |
|
"logits/rejected": 0.40270981192588806, |
|
"logps/chosen": -356.3854064941406, |
|
"logps/rejected": -456.36859130859375, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9084610939025879, |
|
"rewards/margins": 0.8726455569267273, |
|
"rewards/rejected": -1.7811065912246704, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 20.611238253632102, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": 0.42909201979637146, |
|
"logits/rejected": 0.4563824236392975, |
|
"logps/chosen": -440.75994873046875, |
|
"logps/rejected": -490.83575439453125, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2483009099960327, |
|
"rewards/margins": 0.8814948201179504, |
|
"rewards/rejected": -2.129795789718628, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 20.981763353845743, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": 0.4369926452636719, |
|
"logits/rejected": 0.4608924984931946, |
|
"logps/chosen": -429.01239013671875, |
|
"logps/rejected": -467.69207763671875, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2999002933502197, |
|
"rewards/margins": 0.5992451906204224, |
|
"rewards/rejected": -1.899145483970642, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 27.79991213917318, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": 0.35042619705200195, |
|
"logits/rejected": 0.3954378664493561, |
|
"logps/chosen": -357.94622802734375, |
|
"logps/rejected": -463.93359375, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8437250852584839, |
|
"rewards/margins": 1.2400966882705688, |
|
"rewards/rejected": -2.0838217735290527, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 23.81723686863117, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": 0.3485955595970154, |
|
"logits/rejected": 0.41709309816360474, |
|
"logps/chosen": -413.65496826171875, |
|
"logps/rejected": -482.4605407714844, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.308464765548706, |
|
"rewards/margins": 0.9804670214653015, |
|
"rewards/rejected": -2.2889316082000732, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 26.981005902457373, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": 0.21841764450073242, |
|
"logits/rejected": 0.3175281882286072, |
|
"logps/chosen": -473.5174865722656, |
|
"logps/rejected": -490.5992126464844, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4544774293899536, |
|
"rewards/margins": 0.8454269170761108, |
|
"rewards/rejected": -2.2999043464660645, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 25.852778743297964, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": 0.4441685676574707, |
|
"logits/rejected": 0.5196029543876648, |
|
"logps/chosen": -418.32501220703125, |
|
"logps/rejected": -412.47015380859375, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4650938510894775, |
|
"rewards/margins": 0.5241371989250183, |
|
"rewards/rejected": -1.9892311096191406, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 24.34023435468119, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": 0.5028194785118103, |
|
"logits/rejected": 0.5491822361946106, |
|
"logps/chosen": -414.00372314453125, |
|
"logps/rejected": -485.53900146484375, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2709230184555054, |
|
"rewards/margins": 1.1440144777297974, |
|
"rewards/rejected": -2.4149374961853027, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 34.608951645650336, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": 0.5384331941604614, |
|
"logits/rejected": 0.5625046491622925, |
|
"logps/chosen": -426.2798767089844, |
|
"logps/rejected": -554.5643310546875, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1276309490203857, |
|
"rewards/margins": 1.2486127614974976, |
|
"rewards/rejected": -2.3762435913085938, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 21.349543094305762, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": 0.4867774546146393, |
|
"logits/rejected": 0.5194205045700073, |
|
"logps/chosen": -423.35272216796875, |
|
"logps/rejected": -517.1748046875, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.104402780532837, |
|
"rewards/margins": 1.1897287368774414, |
|
"rewards/rejected": -2.2941317558288574, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 22.932371419845182, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": 0.6204196214675903, |
|
"logits/rejected": 0.6645633578300476, |
|
"logps/chosen": -423.0869140625, |
|
"logps/rejected": -528.7664184570312, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.224035382270813, |
|
"rewards/margins": 1.2077516317367554, |
|
"rewards/rejected": -2.4317872524261475, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 25.20618572072171, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": 0.3865453600883484, |
|
"logits/rejected": 0.4525006413459778, |
|
"logps/chosen": -353.2498474121094, |
|
"logps/rejected": -409.4380798339844, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9723296165466309, |
|
"rewards/margins": 0.8267601728439331, |
|
"rewards/rejected": -1.799089789390564, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 38.18407998689627, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": 0.4781351685523987, |
|
"logits/rejected": 0.5323950052261353, |
|
"logps/chosen": -413.47967529296875, |
|
"logps/rejected": -474.1600036621094, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4541221857070923, |
|
"rewards/margins": 0.8590675592422485, |
|
"rewards/rejected": -2.313189744949341, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 24.09623950016077, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": 0.5714893341064453, |
|
"logits/rejected": 0.6005902290344238, |
|
"logps/chosen": -408.2439270019531, |
|
"logps/rejected": -502.76617431640625, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2594388723373413, |
|
"rewards/margins": 1.0005667209625244, |
|
"rewards/rejected": -2.260005474090576, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 28.6113600733398, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": 0.6574729681015015, |
|
"logits/rejected": 0.748490571975708, |
|
"logps/chosen": -373.8766784667969, |
|
"logps/rejected": -522.918701171875, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1603968143463135, |
|
"rewards/margins": 1.344420075416565, |
|
"rewards/rejected": -2.504816770553589, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 22.790330638281535, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": 0.5872513651847839, |
|
"logits/rejected": 0.6414788961410522, |
|
"logps/chosen": -417.19696044921875, |
|
"logps/rejected": -574.2335815429688, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2055175304412842, |
|
"rewards/margins": 1.7793638706207275, |
|
"rewards/rejected": -2.9848814010620117, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 26.978273454954337, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": 0.5491480827331543, |
|
"logits/rejected": 0.6279400587081909, |
|
"logps/chosen": -430.430908203125, |
|
"logps/rejected": -508.8794860839844, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3094874620437622, |
|
"rewards/margins": 1.0253905057907104, |
|
"rewards/rejected": -2.3348779678344727, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 24.715640254579753, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": 0.4640998840332031, |
|
"logits/rejected": 0.5079622268676758, |
|
"logps/chosen": -419.351806640625, |
|
"logps/rejected": -523.3448486328125, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0731154680252075, |
|
"rewards/margins": 1.3017503023147583, |
|
"rewards/rejected": -2.374865770339966, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 26.4759501785462, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": 0.4973793029785156, |
|
"logits/rejected": 0.4787571430206299, |
|
"logps/chosen": -374.91082763671875, |
|
"logps/rejected": -518.9568481445312, |
|
"loss": 0.52, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1492761373519897, |
|
"rewards/margins": 1.374403953552246, |
|
"rewards/rejected": -2.5236802101135254, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 24.37371579733604, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": 0.6322020888328552, |
|
"logits/rejected": 0.676570475101471, |
|
"logps/chosen": -413.09967041015625, |
|
"logps/rejected": -491.43280029296875, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3825113773345947, |
|
"rewards/margins": 0.9496244192123413, |
|
"rewards/rejected": -2.3321359157562256, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 33.069428752057235, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": 0.6233787536621094, |
|
"logits/rejected": 0.6681519150733948, |
|
"logps/chosen": -405.11572265625, |
|
"logps/rejected": -605.8905029296875, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4363985061645508, |
|
"rewards/margins": 1.7567567825317383, |
|
"rewards/rejected": -3.193154811859131, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 28.260532330049596, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": 0.5473383665084839, |
|
"logits/rejected": 0.5997258424758911, |
|
"logps/chosen": -350.90234375, |
|
"logps/rejected": -436.4149475097656, |
|
"loss": 0.5328, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.217137098312378, |
|
"rewards/margins": 0.5057491064071655, |
|
"rewards/rejected": -1.722886323928833, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 22.284111196687974, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": 0.6474301815032959, |
|
"logits/rejected": 0.660786509513855, |
|
"logps/chosen": -404.047607421875, |
|
"logps/rejected": -641.9457397460938, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5259559154510498, |
|
"rewards/margins": 2.1374330520629883, |
|
"rewards/rejected": -3.66338849067688, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 25.147753437467735, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": 0.622474193572998, |
|
"logits/rejected": 0.6864532828330994, |
|
"logps/chosen": -511.6786193847656, |
|
"logps/rejected": -565.4790649414062, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4496666193008423, |
|
"rewards/margins": 1.0919597148895264, |
|
"rewards/rejected": -2.541626214981079, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 30.072916780925627, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": 0.6727067232131958, |
|
"logits/rejected": 0.6959071159362793, |
|
"logps/chosen": -438.89453125, |
|
"logps/rejected": -549.9530029296875, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4524840116500854, |
|
"rewards/margins": 1.1177465915679932, |
|
"rewards/rejected": -2.570230722427368, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 28.079457804957293, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": 0.49242812395095825, |
|
"logits/rejected": 0.5447872877120972, |
|
"logps/chosen": -427.9210510253906, |
|
"logps/rejected": -566.0662841796875, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2132065296173096, |
|
"rewards/margins": 1.4744932651519775, |
|
"rewards/rejected": -2.687699794769287, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 26.037654165610427, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": 0.531968355178833, |
|
"logits/rejected": 0.5523951053619385, |
|
"logps/chosen": -384.9483337402344, |
|
"logps/rejected": -471.51300048828125, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4794952869415283, |
|
"rewards/margins": 1.0920584201812744, |
|
"rewards/rejected": -2.5715537071228027, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 26.5263500776438, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": 0.33142679929733276, |
|
"logits/rejected": 0.3150361478328705, |
|
"logps/chosen": -445.0096130371094, |
|
"logps/rejected": -515.8156127929688, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3551602363586426, |
|
"rewards/margins": 0.6439944505691528, |
|
"rewards/rejected": -1.9991546869277954, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 23.152212301904747, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": 0.5838603973388672, |
|
"logits/rejected": 0.6234208345413208, |
|
"logps/chosen": -459.1851501464844, |
|
"logps/rejected": -571.7786254882812, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4499067068099976, |
|
"rewards/margins": 1.524433970451355, |
|
"rewards/rejected": -2.9743404388427734, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 25.927981383141496, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": 0.44739946722984314, |
|
"logits/rejected": 0.41936665773391724, |
|
"logps/chosen": -408.06536865234375, |
|
"logps/rejected": -458.7378845214844, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.203150749206543, |
|
"rewards/margins": 0.6691651940345764, |
|
"rewards/rejected": -1.8723161220550537, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 29.57680066351449, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": 0.4725145399570465, |
|
"logits/rejected": 0.5294008255004883, |
|
"logps/chosen": -469.10107421875, |
|
"logps/rejected": -531.1735229492188, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3328118324279785, |
|
"rewards/margins": 1.1033190488815308, |
|
"rewards/rejected": -2.436131000518799, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 29.953731902113084, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": 0.5199874639511108, |
|
"logits/rejected": 0.5354346036911011, |
|
"logps/chosen": -342.24407958984375, |
|
"logps/rejected": -429.68341064453125, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9911921620368958, |
|
"rewards/margins": 0.44350871443748474, |
|
"rewards/rejected": -1.4347009658813477, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 22.682880403794634, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": 0.5494628548622131, |
|
"logits/rejected": 0.5927091836929321, |
|
"logps/chosen": -379.50164794921875, |
|
"logps/rejected": -497.7547302246094, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1960418224334717, |
|
"rewards/margins": 1.1193139553070068, |
|
"rewards/rejected": -2.3153557777404785, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 22.940020552459323, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": 0.65094393491745, |
|
"logits/rejected": 0.6770426630973816, |
|
"logps/chosen": -493.34033203125, |
|
"logps/rejected": -730.2711181640625, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.918515920639038, |
|
"rewards/margins": 2.4283804893493652, |
|
"rewards/rejected": -4.346896171569824, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 25.89668875929957, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": 0.5813716650009155, |
|
"logits/rejected": 0.5926984548568726, |
|
"logps/chosen": -397.17742919921875, |
|
"logps/rejected": -445.9786682128906, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2316924333572388, |
|
"rewards/margins": 0.7258312702178955, |
|
"rewards/rejected": -1.9575237035751343, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 28.005322379907067, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": 0.47907742857933044, |
|
"logits/rejected": 0.5216917395591736, |
|
"logps/chosen": -399.86456298828125, |
|
"logps/rejected": -612.7337036132812, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.352043628692627, |
|
"rewards/margins": 1.677664041519165, |
|
"rewards/rejected": -3.029707431793213, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 27.73704631088212, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": 0.6081745624542236, |
|
"logits/rejected": 0.583394467830658, |
|
"logps/chosen": -376.72491455078125, |
|
"logps/rejected": -477.5032653808594, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.322222113609314, |
|
"rewards/margins": 0.8721188306808472, |
|
"rewards/rejected": -2.194340944290161, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 26.265554246609113, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": 0.6204389333724976, |
|
"logits/rejected": 0.6838095188140869, |
|
"logps/chosen": -449.0771484375, |
|
"logps/rejected": -474.48614501953125, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.383966326713562, |
|
"rewards/margins": 0.7637938261032104, |
|
"rewards/rejected": -2.1477601528167725, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 29.348434987188984, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": 0.6158981323242188, |
|
"logits/rejected": 0.6594232320785522, |
|
"logps/chosen": -355.5142517089844, |
|
"logps/rejected": -373.458740234375, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9645657539367676, |
|
"rewards/margins": 0.5557066202163696, |
|
"rewards/rejected": -1.5202723741531372, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 27.893225157055635, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": 0.5417597889900208, |
|
"logits/rejected": 0.5491499900817871, |
|
"logps/chosen": -412.00408935546875, |
|
"logps/rejected": -480.2135314941406, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0488494634628296, |
|
"rewards/margins": 0.9427961111068726, |
|
"rewards/rejected": -1.9916454553604126, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 27.523271320708126, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": 0.6551875472068787, |
|
"logits/rejected": 0.6813570857048035, |
|
"logps/chosen": -417.86602783203125, |
|
"logps/rejected": -490.6324768066406, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4586915969848633, |
|
"rewards/margins": 0.9774697422981262, |
|
"rewards/rejected": -2.436161518096924, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 27.996097883290435, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": 0.5485438108444214, |
|
"logits/rejected": 0.5184262990951538, |
|
"logps/chosen": -395.86700439453125, |
|
"logps/rejected": -491.7772521972656, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6062934398651123, |
|
"rewards/margins": 0.8541345596313477, |
|
"rewards/rejected": -2.46042799949646, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 29.83078441321164, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": 0.604858934879303, |
|
"logits/rejected": 0.6166650056838989, |
|
"logps/chosen": -440.02288818359375, |
|
"logps/rejected": -557.9898681640625, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7407538890838623, |
|
"rewards/margins": 1.2068676948547363, |
|
"rewards/rejected": -2.9476218223571777, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 31.550622486136575, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": 0.5251548886299133, |
|
"logits/rejected": 0.5625226497650146, |
|
"logps/chosen": -412.3245544433594, |
|
"logps/rejected": -622.3465576171875, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4821490049362183, |
|
"rewards/margins": 2.1304643154144287, |
|
"rewards/rejected": -3.6126132011413574, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 36.283592176451926, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": 0.49148011207580566, |
|
"logits/rejected": 0.5424791574478149, |
|
"logps/chosen": -365.7886962890625, |
|
"logps/rejected": -534.2448120117188, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1752984523773193, |
|
"rewards/margins": 1.5028009414672852, |
|
"rewards/rejected": -2.6780993938446045, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 28.465184644606882, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": 0.5210837721824646, |
|
"logits/rejected": 0.559233546257019, |
|
"logps/chosen": -434.03173828125, |
|
"logps/rejected": -449.0094299316406, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2292797565460205, |
|
"rewards/margins": 0.6202489733695984, |
|
"rewards/rejected": -1.8495289087295532, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 26.71337589674436, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": 0.5467537045478821, |
|
"logits/rejected": 0.5603493452072144, |
|
"logps/chosen": -381.9140930175781, |
|
"logps/rejected": -523.8021240234375, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.217504858970642, |
|
"rewards/margins": 1.6663074493408203, |
|
"rewards/rejected": -2.883812427520752, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 21.803578934970087, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": 0.5511429905891418, |
|
"logits/rejected": 0.5506604909896851, |
|
"logps/chosen": -393.09063720703125, |
|
"logps/rejected": -503.272705078125, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1632798910140991, |
|
"rewards/margins": 0.7249369025230408, |
|
"rewards/rejected": -1.8882166147232056, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 23.93105714760726, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": 0.5443588495254517, |
|
"logits/rejected": 0.6058872938156128, |
|
"logps/chosen": -424.76654052734375, |
|
"logps/rejected": -503.58905029296875, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1997069120407104, |
|
"rewards/margins": 0.5387487411499023, |
|
"rewards/rejected": -1.7384557723999023, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 23.947804764647334, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": 0.5812084078788757, |
|
"logits/rejected": 0.66224205493927, |
|
"logps/chosen": -391.9268493652344, |
|
"logps/rejected": -514.671142578125, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2656793594360352, |
|
"rewards/margins": 1.1044799089431763, |
|
"rewards/rejected": -2.37015962600708, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 41.384778071002756, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": 0.7019360065460205, |
|
"logits/rejected": 0.7781286239624023, |
|
"logps/chosen": -434.8446350097656, |
|
"logps/rejected": -576.0948486328125, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8481266498565674, |
|
"rewards/margins": 1.3344603776931763, |
|
"rewards/rejected": -3.182587146759033, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 26.427988861693198, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": 0.48930302262306213, |
|
"logits/rejected": 0.5008028745651245, |
|
"logps/chosen": -444.37188720703125, |
|
"logps/rejected": -591.009521484375, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6480128765106201, |
|
"rewards/margins": 1.4180339574813843, |
|
"rewards/rejected": -3.066046714782715, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 28.520099902935833, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": 0.5077411532402039, |
|
"logits/rejected": 0.5465759634971619, |
|
"logps/chosen": -451.1187438964844, |
|
"logps/rejected": -498.052001953125, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2567503452301025, |
|
"rewards/margins": 0.9746766090393066, |
|
"rewards/rejected": -2.231426954269409, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 30.554823939974213, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": 0.5454415082931519, |
|
"logits/rejected": 0.5317539572715759, |
|
"logps/chosen": -392.7830810546875, |
|
"logps/rejected": -589.8780517578125, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2405519485473633, |
|
"rewards/margins": 1.7801573276519775, |
|
"rewards/rejected": -3.020709276199341, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 29.80571254516348, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": 0.4539203643798828, |
|
"logits/rejected": 0.4859291613101959, |
|
"logps/chosen": -468.7974548339844, |
|
"logps/rejected": -593.902587890625, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4451267719268799, |
|
"rewards/margins": 1.7116448879241943, |
|
"rewards/rejected": -3.156771659851074, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 25.06647355845889, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": 0.5843818783760071, |
|
"logits/rejected": 0.6344080567359924, |
|
"logps/chosen": -448.7802734375, |
|
"logps/rejected": -481.861572265625, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.483563780784607, |
|
"rewards/margins": 0.7844194173812866, |
|
"rewards/rejected": -2.2679829597473145, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 36.388819937688396, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": 0.5060101747512817, |
|
"logits/rejected": 0.5356935858726501, |
|
"logps/chosen": -380.41058349609375, |
|
"logps/rejected": -483.8663024902344, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3735358715057373, |
|
"rewards/margins": 1.4581668376922607, |
|
"rewards/rejected": -2.831702709197998, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 29.77097075710584, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": 0.49516528844833374, |
|
"logits/rejected": 0.48991116881370544, |
|
"logps/chosen": -502.56036376953125, |
|
"logps/rejected": -589.65234375, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8622710704803467, |
|
"rewards/margins": 1.399813175201416, |
|
"rewards/rejected": -3.2620842456817627, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 24.214142767465663, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": 0.5962218642234802, |
|
"logits/rejected": 0.5794373750686646, |
|
"logps/chosen": -385.8623962402344, |
|
"logps/rejected": -487.9881896972656, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3321025371551514, |
|
"rewards/margins": 0.9605873823165894, |
|
"rewards/rejected": -2.292689800262451, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 23.80846287144373, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": 0.4128933548927307, |
|
"logits/rejected": 0.46047353744506836, |
|
"logps/chosen": -453.2511291503906, |
|
"logps/rejected": -729.1717529296875, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5917387008666992, |
|
"rewards/margins": 2.3557941913604736, |
|
"rewards/rejected": -3.947533369064331, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 26.22197096361496, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": 0.3974430561065674, |
|
"logits/rejected": 0.4507156312465668, |
|
"logps/chosen": -367.8086853027344, |
|
"logps/rejected": -498.85888671875, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.061302900314331, |
|
"rewards/margins": 1.3938474655151367, |
|
"rewards/rejected": -2.4551501274108887, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 32.08569403406024, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": 0.45793819427490234, |
|
"logits/rejected": 0.4204599857330322, |
|
"logps/chosen": -417.1183166503906, |
|
"logps/rejected": -460.21307373046875, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1381946802139282, |
|
"rewards/margins": 0.8046306371688843, |
|
"rewards/rejected": -1.9428255558013916, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 37.980694296967876, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": 0.5375388860702515, |
|
"logits/rejected": 0.5443367958068848, |
|
"logps/chosen": -446.30975341796875, |
|
"logps/rejected": -508.3763732910156, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3490548133850098, |
|
"rewards/margins": 1.0032390356063843, |
|
"rewards/rejected": -2.3522937297821045, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 26.666657891859266, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": 0.3666940927505493, |
|
"logits/rejected": 0.43050676584243774, |
|
"logps/chosen": -452.58447265625, |
|
"logps/rejected": -585.009521484375, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3998275995254517, |
|
"rewards/margins": 1.149751901626587, |
|
"rewards/rejected": -2.54957914352417, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 24.096006375464146, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": 0.4738024175167084, |
|
"logits/rejected": 0.5199214816093445, |
|
"logps/chosen": -412.96148681640625, |
|
"logps/rejected": -558.6272583007812, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5531818866729736, |
|
"rewards/margins": 1.2759530544281006, |
|
"rewards/rejected": -2.829134702682495, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 33.47202296615829, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": 0.5434667468070984, |
|
"logits/rejected": 0.5950125455856323, |
|
"logps/chosen": -384.5709228515625, |
|
"logps/rejected": -510.760009765625, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5631451606750488, |
|
"rewards/margins": 1.1664178371429443, |
|
"rewards/rejected": -2.7295632362365723, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 43.06627660738899, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": 0.43661123514175415, |
|
"logits/rejected": 0.4971885681152344, |
|
"logps/chosen": -351.27423095703125, |
|
"logps/rejected": -520.4568481445312, |
|
"loss": 0.5242, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.249340534210205, |
|
"rewards/margins": 1.5152721405029297, |
|
"rewards/rejected": -2.7646126747131348, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 26.713110356503616, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": 0.5615112781524658, |
|
"logits/rejected": 0.5642154216766357, |
|
"logps/chosen": -469.04559326171875, |
|
"logps/rejected": -581.6990966796875, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8216552734375, |
|
"rewards/margins": 1.0309664011001587, |
|
"rewards/rejected": -2.8526217937469482, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 22.71250250046501, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": 0.47569403052330017, |
|
"logits/rejected": 0.46433067321777344, |
|
"logps/chosen": -456.00799560546875, |
|
"logps/rejected": -559.7556762695312, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5954478979110718, |
|
"rewards/margins": 1.2076561450958252, |
|
"rewards/rejected": -2.8031039237976074, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 37.7068693885991, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": 0.32526105642318726, |
|
"logits/rejected": 0.3423188328742981, |
|
"logps/chosen": -531.8027954101562, |
|
"logps/rejected": -669.9105224609375, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.001676082611084, |
|
"rewards/margins": 1.3426449298858643, |
|
"rewards/rejected": -3.3443210124969482, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 28.132187213619453, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": 0.47985514998435974, |
|
"logits/rejected": 0.5144001245498657, |
|
"logps/chosen": -452.16876220703125, |
|
"logps/rejected": -552.7636108398438, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7105381488800049, |
|
"rewards/margins": 0.9188302755355835, |
|
"rewards/rejected": -2.629368305206299, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 38.88584921816368, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": 0.35465139150619507, |
|
"logits/rejected": 0.4013524055480957, |
|
"logps/chosen": -436.73828125, |
|
"logps/rejected": -477.15777587890625, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.45534086227417, |
|
"rewards/margins": 0.4723455011844635, |
|
"rewards/rejected": -1.927686333656311, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 22.941320708531656, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": 0.4294286370277405, |
|
"logits/rejected": 0.4825877547264099, |
|
"logps/chosen": -408.41009521484375, |
|
"logps/rejected": -561.3411254882812, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2056307792663574, |
|
"rewards/margins": 1.601295828819275, |
|
"rewards/rejected": -2.8069262504577637, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 35.45487358226262, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": 0.6182829737663269, |
|
"logits/rejected": 0.6686528921127319, |
|
"logps/chosen": -513.27783203125, |
|
"logps/rejected": -652.1695556640625, |
|
"loss": 0.4855, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7456719875335693, |
|
"rewards/margins": 1.04084050655365, |
|
"rewards/rejected": -2.786512613296509, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 31.60040000561529, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": 0.4577367901802063, |
|
"logits/rejected": 0.47269877791404724, |
|
"logps/chosen": -381.79400634765625, |
|
"logps/rejected": -502.0397033691406, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3730051517486572, |
|
"rewards/margins": 1.0319459438323975, |
|
"rewards/rejected": -2.404951333999634, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 30.53844409715948, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 0.3548828959465027, |
|
"logits/rejected": 0.3153901696205139, |
|
"logps/chosen": -489.1748962402344, |
|
"logps/rejected": -574.3113403320312, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6738046407699585, |
|
"rewards/margins": 0.8269612193107605, |
|
"rewards/rejected": -2.5007660388946533, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 26.31960630753036, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": 0.5535754561424255, |
|
"logits/rejected": 0.571679949760437, |
|
"logps/chosen": -414.44207763671875, |
|
"logps/rejected": -463.46588134765625, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2445614337921143, |
|
"rewards/margins": 0.9598299860954285, |
|
"rewards/rejected": -2.2043912410736084, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": 0.5330603122711182, |
|
"eval_logits/rejected": 0.5456880927085876, |
|
"eval_logps/chosen": -420.8943786621094, |
|
"eval_logps/rejected": -539.791015625, |
|
"eval_loss": 0.5041713118553162, |
|
"eval_rewards/accuracies": 0.7875000238418579, |
|
"eval_rewards/chosen": -1.4196977615356445, |
|
"eval_rewards/margins": 1.1845324039459229, |
|
"eval_rewards/rejected": -2.6042304039001465, |
|
"eval_runtime": 98.5276, |
|
"eval_samples_per_second": 45.277, |
|
"eval_steps_per_second": 0.71, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 36.92749686465627, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": 0.5472335815429688, |
|
"logits/rejected": 0.5719302296638489, |
|
"logps/chosen": -478.77862548828125, |
|
"logps/rejected": -559.6343383789062, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7711069583892822, |
|
"rewards/margins": 1.1662440299987793, |
|
"rewards/rejected": -2.9373512268066406, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 29.365570252191475, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": 0.5482034087181091, |
|
"logits/rejected": 0.5884445309638977, |
|
"logps/chosen": -440.0167541503906, |
|
"logps/rejected": -539.4930419921875, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.330872893333435, |
|
"rewards/margins": 1.472027063369751, |
|
"rewards/rejected": -2.8028998374938965, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 26.28817793829581, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": 0.4989794194698334, |
|
"logits/rejected": 0.5507176518440247, |
|
"logps/chosen": -386.51409912109375, |
|
"logps/rejected": -468.39410400390625, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.416949987411499, |
|
"rewards/margins": 0.7112621665000916, |
|
"rewards/rejected": -2.1282119750976562, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 31.006956473511085, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": 0.6589056253433228, |
|
"logits/rejected": 0.6739957332611084, |
|
"logps/chosen": -375.49212646484375, |
|
"logps/rejected": -606.4843139648438, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1116955280303955, |
|
"rewards/margins": 2.418699026107788, |
|
"rewards/rejected": -3.5303947925567627, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 45.236179424056324, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": 0.495623916387558, |
|
"logits/rejected": 0.501970112323761, |
|
"logps/chosen": -460.5111389160156, |
|
"logps/rejected": -548.1988525390625, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.778485655784607, |
|
"rewards/margins": 0.600300669670105, |
|
"rewards/rejected": -2.378786087036133, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 32.660373747646204, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": 0.5202032923698425, |
|
"logits/rejected": 0.5697475671768188, |
|
"logps/chosen": -439.6302185058594, |
|
"logps/rejected": -633.529296875, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7451140880584717, |
|
"rewards/margins": 1.8138139247894287, |
|
"rewards/rejected": -3.5589280128479004, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 51.57954812756242, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": 0.49899429082870483, |
|
"logits/rejected": 0.5181163549423218, |
|
"logps/chosen": -429.69696044921875, |
|
"logps/rejected": -546.2327270507812, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4596073627471924, |
|
"rewards/margins": 1.2351174354553223, |
|
"rewards/rejected": -2.6947247982025146, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 37.743557346368355, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": 0.4497598111629486, |
|
"logits/rejected": 0.4462924003601074, |
|
"logps/chosen": -512.8709106445312, |
|
"logps/rejected": -644.2141723632812, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.907044768333435, |
|
"rewards/margins": 1.6575613021850586, |
|
"rewards/rejected": -3.5646064281463623, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 38.732238817533585, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": 0.6549265384674072, |
|
"logits/rejected": 0.6424221396446228, |
|
"logps/chosen": -469.24627685546875, |
|
"logps/rejected": -607.3594970703125, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6534907817840576, |
|
"rewards/margins": 1.179262399673462, |
|
"rewards/rejected": -2.8327534198760986, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 32.977127485837634, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": 0.22545738518238068, |
|
"logits/rejected": 0.31531310081481934, |
|
"logps/chosen": -394.68255615234375, |
|
"logps/rejected": -499.6689453125, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.201900839805603, |
|
"rewards/margins": 0.9213203191757202, |
|
"rewards/rejected": -2.123220920562744, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 31.676779604073975, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": 0.5821847915649414, |
|
"logits/rejected": 0.6351027488708496, |
|
"logps/chosen": -432.86981201171875, |
|
"logps/rejected": -522.4436645507812, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3247367143630981, |
|
"rewards/margins": 0.9480254054069519, |
|
"rewards/rejected": -2.2727620601654053, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 35.00486375316812, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": 0.4525614380836487, |
|
"logits/rejected": 0.4556906819343567, |
|
"logps/chosen": -410.01922607421875, |
|
"logps/rejected": -492.8228454589844, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4033153057098389, |
|
"rewards/margins": 0.8626031875610352, |
|
"rewards/rejected": -2.265918493270874, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 30.270031114348164, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": 0.6305533647537231, |
|
"logits/rejected": 0.6111758351325989, |
|
"logps/chosen": -405.37762451171875, |
|
"logps/rejected": -599.3681640625, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.399217963218689, |
|
"rewards/margins": 1.6433988809585571, |
|
"rewards/rejected": -3.042617082595825, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 25.427591996310284, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": 0.5907707810401917, |
|
"logits/rejected": 0.6095005869865417, |
|
"logps/chosen": -420.5110778808594, |
|
"logps/rejected": -531.2003784179688, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4939762353897095, |
|
"rewards/margins": 1.0677590370178223, |
|
"rewards/rejected": -2.561735153198242, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 29.10370817287732, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": 0.5958229303359985, |
|
"logits/rejected": 0.5724313855171204, |
|
"logps/chosen": -438.81842041015625, |
|
"logps/rejected": -509.04364013671875, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.548699975013733, |
|
"rewards/margins": 1.0004830360412598, |
|
"rewards/rejected": -2.5491831302642822, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 29.350472811549906, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": 0.5696316361427307, |
|
"logits/rejected": 0.5993034243583679, |
|
"logps/chosen": -406.0208435058594, |
|
"logps/rejected": -559.4927978515625, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5112321376800537, |
|
"rewards/margins": 1.6407535076141357, |
|
"rewards/rejected": -3.1519858837127686, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 30.396592351915984, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": 0.4575433135032654, |
|
"logits/rejected": 0.5362235307693481, |
|
"logps/chosen": -446.55767822265625, |
|
"logps/rejected": -541.7037963867188, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6036920547485352, |
|
"rewards/margins": 0.8041864633560181, |
|
"rewards/rejected": -2.4078783988952637, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 34.376914467887886, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": 0.6164439916610718, |
|
"logits/rejected": 0.5993391871452332, |
|
"logps/chosen": -476.3313903808594, |
|
"logps/rejected": -687.8834228515625, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.068695545196533, |
|
"rewards/margins": 1.9827091693878174, |
|
"rewards/rejected": -4.0514044761657715, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 32.201249898617604, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": 0.6297436952590942, |
|
"logits/rejected": 0.6931129693984985, |
|
"logps/chosen": -443.54193115234375, |
|
"logps/rejected": -651.564453125, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6411831378936768, |
|
"rewards/margins": 2.111694812774658, |
|
"rewards/rejected": -3.752877712249756, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 33.481424863253956, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": 0.581652820110321, |
|
"logits/rejected": 0.6015012860298157, |
|
"logps/chosen": -468.0342712402344, |
|
"logps/rejected": -665.8737182617188, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9511600732803345, |
|
"rewards/margins": 2.2348785400390625, |
|
"rewards/rejected": -4.186038017272949, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 30.668163376222036, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": 0.5464416742324829, |
|
"logits/rejected": 0.5375434160232544, |
|
"logps/chosen": -479.27197265625, |
|
"logps/rejected": -527.6573486328125, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6696078777313232, |
|
"rewards/margins": 0.8726032972335815, |
|
"rewards/rejected": -2.5422110557556152, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 50.75005189933063, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": 0.6589860320091248, |
|
"logits/rejected": 0.6931478977203369, |
|
"logps/chosen": -399.00360107421875, |
|
"logps/rejected": -464.0816955566406, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2213267087936401, |
|
"rewards/margins": 0.4684023857116699, |
|
"rewards/rejected": -1.6897293329238892, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 41.032076102256525, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": 0.7128033638000488, |
|
"logits/rejected": 0.7279654741287231, |
|
"logps/chosen": -364.8663024902344, |
|
"logps/rejected": -559.3302001953125, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1767915487289429, |
|
"rewards/margins": 1.811647653579712, |
|
"rewards/rejected": -2.9884393215179443, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 53.34540663235414, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": 0.649802565574646, |
|
"logits/rejected": 0.7241895198822021, |
|
"logps/chosen": -443.50811767578125, |
|
"logps/rejected": -610.3703002929688, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4965871572494507, |
|
"rewards/margins": 1.7070690393447876, |
|
"rewards/rejected": -3.203655958175659, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 25.357687908984758, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": 0.630439043045044, |
|
"logits/rejected": 0.6549093127250671, |
|
"logps/chosen": -375.9720153808594, |
|
"logps/rejected": -464.39239501953125, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.233820915222168, |
|
"rewards/margins": 0.9906282424926758, |
|
"rewards/rejected": -2.2244491577148438, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 47.499335691257166, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": 0.5655387043952942, |
|
"logits/rejected": 0.6218944191932678, |
|
"logps/chosen": -422.29351806640625, |
|
"logps/rejected": -549.7659301757812, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5186660289764404, |
|
"rewards/margins": 1.3507541418075562, |
|
"rewards/rejected": -2.869419813156128, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 27.17413580646397, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": 0.6208091974258423, |
|
"logits/rejected": 0.6463747024536133, |
|
"logps/chosen": -433.322265625, |
|
"logps/rejected": -678.259521484375, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6338462829589844, |
|
"rewards/margins": 2.446549654006958, |
|
"rewards/rejected": -4.080395698547363, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 28.316091683395783, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": 0.5385035276412964, |
|
"logits/rejected": 0.5358555316925049, |
|
"logps/chosen": -412.51226806640625, |
|
"logps/rejected": -599.2774658203125, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8855476379394531, |
|
"rewards/margins": 1.7057392597198486, |
|
"rewards/rejected": -3.591287136077881, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 34.541036258138135, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": 0.518667459487915, |
|
"logits/rejected": 0.5254421830177307, |
|
"logps/chosen": -403.07269287109375, |
|
"logps/rejected": -506.0286560058594, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5274192094802856, |
|
"rewards/margins": 1.0553488731384277, |
|
"rewards/rejected": -2.582767963409424, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 28.277531402961085, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": 0.5363879203796387, |
|
"logits/rejected": 0.5436598062515259, |
|
"logps/chosen": -434.55340576171875, |
|
"logps/rejected": -601.2906494140625, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9992926120758057, |
|
"rewards/margins": 1.2217543125152588, |
|
"rewards/rejected": -3.2210469245910645, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 28.240328527545266, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": 0.4857005178928375, |
|
"logits/rejected": 0.5038618445396423, |
|
"logps/chosen": -473.5176696777344, |
|
"logps/rejected": -521.3526611328125, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.535585880279541, |
|
"rewards/margins": 0.8739916086196899, |
|
"rewards/rejected": -2.4095776081085205, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 28.035767914492325, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": 0.46195000410079956, |
|
"logits/rejected": 0.4942271113395691, |
|
"logps/chosen": -370.6980895996094, |
|
"logps/rejected": -514.0704345703125, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2649797201156616, |
|
"rewards/margins": 0.9138463735580444, |
|
"rewards/rejected": -2.178826093673706, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 49.81514108152477, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": 0.6403766870498657, |
|
"logits/rejected": 0.6394528150558472, |
|
"logps/chosen": -436.5330505371094, |
|
"logps/rejected": -459.36480712890625, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.452787160873413, |
|
"rewards/margins": 0.7825342416763306, |
|
"rewards/rejected": -2.235321283340454, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 39.02908540341096, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": 0.647792398929596, |
|
"logits/rejected": 0.5832428932189941, |
|
"logps/chosen": -378.17205810546875, |
|
"logps/rejected": -390.7867431640625, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.234156608581543, |
|
"rewards/margins": 0.34215831756591797, |
|
"rewards/rejected": -1.5763150453567505, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 34.25301402942352, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": 0.5757008194923401, |
|
"logits/rejected": 0.6183763742446899, |
|
"logps/chosen": -355.47906494140625, |
|
"logps/rejected": -486.69232177734375, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4054075479507446, |
|
"rewards/margins": 1.2384564876556396, |
|
"rewards/rejected": -2.643864154815674, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 33.95997970816329, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": 0.5819541811943054, |
|
"logits/rejected": 0.6103062033653259, |
|
"logps/chosen": -490.56640625, |
|
"logps/rejected": -584.3535766601562, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.5636104345321655, |
|
"rewards/margins": 1.433632254600525, |
|
"rewards/rejected": -2.9972424507141113, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 33.319507197288296, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": 0.48387107253074646, |
|
"logits/rejected": 0.48154598474502563, |
|
"logps/chosen": -442.59228515625, |
|
"logps/rejected": -581.6680908203125, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3801770210266113, |
|
"rewards/margins": 1.2666184902191162, |
|
"rewards/rejected": -2.6467957496643066, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 31.154558612258484, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": 0.7043946385383606, |
|
"logits/rejected": 0.7090233564376831, |
|
"logps/chosen": -413.64569091796875, |
|
"logps/rejected": -415.6429138183594, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4569604396820068, |
|
"rewards/margins": 0.7636488676071167, |
|
"rewards/rejected": -2.220609188079834, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 27.53888081945469, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": 0.7477514147758484, |
|
"logits/rejected": 0.7476789951324463, |
|
"logps/chosen": -377.96209716796875, |
|
"logps/rejected": -513.9478759765625, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4383244514465332, |
|
"rewards/margins": 1.5858564376831055, |
|
"rewards/rejected": -3.0241806507110596, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 29.519016591031086, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": 0.6164499521255493, |
|
"logits/rejected": 0.6799559593200684, |
|
"logps/chosen": -361.77264404296875, |
|
"logps/rejected": -481.71002197265625, |
|
"loss": 0.479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.447037935256958, |
|
"rewards/margins": 1.3069896697998047, |
|
"rewards/rejected": -2.7540273666381836, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 28.74044368904408, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": 0.551243782043457, |
|
"logits/rejected": 0.6056830286979675, |
|
"logps/chosen": -397.4892578125, |
|
"logps/rejected": -533.1715087890625, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3637256622314453, |
|
"rewards/margins": 1.1748710870742798, |
|
"rewards/rejected": -2.5385966300964355, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 39.23052270442962, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": 0.7540609240531921, |
|
"logits/rejected": 0.725174605846405, |
|
"logps/chosen": -453.46246337890625, |
|
"logps/rejected": -699.2947998046875, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.230548620223999, |
|
"rewards/margins": 2.478804588317871, |
|
"rewards/rejected": -4.709353446960449, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 35.02504460778454, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": 0.5849272012710571, |
|
"logits/rejected": 0.5850101113319397, |
|
"logps/chosen": -398.72564697265625, |
|
"logps/rejected": -597.7645874023438, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6642696857452393, |
|
"rewards/margins": 1.7866287231445312, |
|
"rewards/rejected": -3.4508986473083496, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 34.70931185312274, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": 0.6965154409408569, |
|
"logits/rejected": 0.6873563528060913, |
|
"logps/chosen": -419.25323486328125, |
|
"logps/rejected": -576.9484252929688, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7598737478256226, |
|
"rewards/margins": 1.6468613147735596, |
|
"rewards/rejected": -3.4067349433898926, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 28.419435291801697, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": 0.6497688293457031, |
|
"logits/rejected": 0.6952397227287292, |
|
"logps/chosen": -469.9932556152344, |
|
"logps/rejected": -512.4960327148438, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6104280948638916, |
|
"rewards/margins": 0.7736623883247375, |
|
"rewards/rejected": -2.3840904235839844, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 33.62103705328426, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": 0.7049362659454346, |
|
"logits/rejected": 0.7318881750106812, |
|
"logps/chosen": -400.5235900878906, |
|
"logps/rejected": -579.788330078125, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4686295986175537, |
|
"rewards/margins": 1.3642832040786743, |
|
"rewards/rejected": -2.8329129219055176, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 37.1550051643393, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": 0.6910517811775208, |
|
"logits/rejected": 0.7440094947814941, |
|
"logps/chosen": -433.5245666503906, |
|
"logps/rejected": -632.4937133789062, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9925916194915771, |
|
"rewards/margins": 1.4477274417877197, |
|
"rewards/rejected": -3.440319061279297, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 32.702979787660254, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": 0.5501781702041626, |
|
"logits/rejected": 0.6104339361190796, |
|
"logps/chosen": -356.7936096191406, |
|
"logps/rejected": -473.5712890625, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0627294778823853, |
|
"rewards/margins": 1.095290184020996, |
|
"rewards/rejected": -2.158019542694092, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 37.57322885787863, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": 0.6268981695175171, |
|
"logits/rejected": 0.6416260600090027, |
|
"logps/chosen": -450.3939514160156, |
|
"logps/rejected": -575.8052978515625, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5946685075759888, |
|
"rewards/margins": 1.3188021183013916, |
|
"rewards/rejected": -2.91347074508667, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 39.256486442369265, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": 0.6842622756958008, |
|
"logits/rejected": 0.6938222646713257, |
|
"logps/chosen": -393.2452697753906, |
|
"logps/rejected": -461.9776916503906, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3430721759796143, |
|
"rewards/margins": 1.0853911638259888, |
|
"rewards/rejected": -2.4284629821777344, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 30.68214318996349, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": 0.675729513168335, |
|
"logits/rejected": 0.7008381485939026, |
|
"logps/chosen": -472.68341064453125, |
|
"logps/rejected": -557.2034912109375, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8632663488388062, |
|
"rewards/margins": 1.3563127517700195, |
|
"rewards/rejected": -3.219578981399536, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 31.3311200225744, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": 0.6629911661148071, |
|
"logits/rejected": 0.7233511805534363, |
|
"logps/chosen": -439.72784423828125, |
|
"logps/rejected": -584.7631225585938, |
|
"loss": 0.528, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0253746509552, |
|
"rewards/margins": 1.3782660961151123, |
|
"rewards/rejected": -3.4036407470703125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 35.26584973890508, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": 0.6147924661636353, |
|
"logits/rejected": 0.6977729797363281, |
|
"logps/chosen": -460.37890625, |
|
"logps/rejected": -651.292724609375, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7438169717788696, |
|
"rewards/margins": 2.0838496685028076, |
|
"rewards/rejected": -3.827666759490967, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 34.233093320124546, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": 0.6681974530220032, |
|
"logits/rejected": 0.6297106742858887, |
|
"logps/chosen": -375.5139465332031, |
|
"logps/rejected": -458.5330505371094, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3370566368103027, |
|
"rewards/margins": 1.3033640384674072, |
|
"rewards/rejected": -2.640420913696289, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 28.920920445134406, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": 0.7325125932693481, |
|
"logits/rejected": 0.7009168863296509, |
|
"logps/chosen": -475.2723083496094, |
|
"logps/rejected": -622.3833618164062, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.810741662979126, |
|
"rewards/margins": 1.578853726387024, |
|
"rewards/rejected": -3.3895955085754395, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 30.861207689091124, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": 0.7458786964416504, |
|
"logits/rejected": 0.7599055171012878, |
|
"logps/chosen": -476.2272033691406, |
|
"logps/rejected": -782.8841552734375, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.934945821762085, |
|
"rewards/margins": 3.0150437355041504, |
|
"rewards/rejected": -4.9499897956848145, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 31.420286094143293, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": 0.6218976378440857, |
|
"logits/rejected": 0.6657293438911438, |
|
"logps/chosen": -420.7046813964844, |
|
"logps/rejected": -610.647705078125, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6917369365692139, |
|
"rewards/margins": 1.8296884298324585, |
|
"rewards/rejected": -3.521425724029541, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 30.509222434763775, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": 0.6954709887504578, |
|
"logits/rejected": 0.777031421661377, |
|
"logps/chosen": -401.15545654296875, |
|
"logps/rejected": -490.4656677246094, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3204840421676636, |
|
"rewards/margins": 1.0271937847137451, |
|
"rewards/rejected": -2.3476779460906982, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 40.99082438692143, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": 0.6641609072685242, |
|
"logits/rejected": 0.7061329483985901, |
|
"logps/chosen": -401.11053466796875, |
|
"logps/rejected": -666.2782592773438, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5550295114517212, |
|
"rewards/margins": 1.9435818195343018, |
|
"rewards/rejected": -3.4986109733581543, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 23.80753562651165, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": 0.7196059226989746, |
|
"logits/rejected": 0.7276310920715332, |
|
"logps/chosen": -464.3755798339844, |
|
"logps/rejected": -605.0327758789062, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.446529507637024, |
|
"rewards/margins": 1.4860612154006958, |
|
"rewards/rejected": -2.932590961456299, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 35.50656541014287, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": 0.7047610282897949, |
|
"logits/rejected": 0.7627818584442139, |
|
"logps/chosen": -335.9312744140625, |
|
"logps/rejected": -600.0538330078125, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3366457223892212, |
|
"rewards/margins": 2.3122611045837402, |
|
"rewards/rejected": -3.648906707763672, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 31.93713658952523, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": 0.5606223940849304, |
|
"logits/rejected": 0.6245654821395874, |
|
"logps/chosen": -408.01715087890625, |
|
"logps/rejected": -522.1866455078125, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.572877049446106, |
|
"rewards/margins": 1.2301509380340576, |
|
"rewards/rejected": -2.803027629852295, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 27.914653903784515, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": 0.7705515623092651, |
|
"logits/rejected": 0.7918896675109863, |
|
"logps/chosen": -443.3219299316406, |
|
"logps/rejected": -657.1170654296875, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.029221773147583, |
|
"rewards/margins": 2.0203018188476562, |
|
"rewards/rejected": -4.04952335357666, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 24.442785917599732, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": 0.7035326957702637, |
|
"logits/rejected": 0.721504807472229, |
|
"logps/chosen": -443.2125549316406, |
|
"logps/rejected": -473.4432678222656, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4505491256713867, |
|
"rewards/margins": 0.755431056022644, |
|
"rewards/rejected": -2.205980062484741, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 31.963860621592772, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": 0.7350751161575317, |
|
"logits/rejected": 0.7096099853515625, |
|
"logps/chosen": -475.07305908203125, |
|
"logps/rejected": -598.759521484375, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5520153045654297, |
|
"rewards/margins": 1.6064621210098267, |
|
"rewards/rejected": -3.158477306365967, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 31.899547328781036, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": 0.6485757827758789, |
|
"logits/rejected": 0.7398275136947632, |
|
"logps/chosen": -451.94580078125, |
|
"logps/rejected": -686.9503173828125, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.814279556274414, |
|
"rewards/margins": 2.0318989753723145, |
|
"rewards/rejected": -3.8461780548095703, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 26.178708516639514, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": 0.6045451760292053, |
|
"logits/rejected": 0.6039005517959595, |
|
"logps/chosen": -454.6619567871094, |
|
"logps/rejected": -534.8763427734375, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7492996454238892, |
|
"rewards/margins": 1.4018948078155518, |
|
"rewards/rejected": -3.1511945724487305, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 35.91366947948363, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": 0.5839170217514038, |
|
"logits/rejected": 0.6014143228530884, |
|
"logps/chosen": -384.09564208984375, |
|
"logps/rejected": -648.3985595703125, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4092109203338623, |
|
"rewards/margins": 2.2909350395202637, |
|
"rewards/rejected": -3.700145721435547, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 35.647393846898865, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": 0.6380244493484497, |
|
"logits/rejected": 0.6699076294898987, |
|
"logps/chosen": -416.11297607421875, |
|
"logps/rejected": -456.918212890625, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3569297790527344, |
|
"rewards/margins": 0.7398675084114075, |
|
"rewards/rejected": -2.096797227859497, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 21.784137715119133, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": 0.6910983920097351, |
|
"logits/rejected": 0.6865051984786987, |
|
"logps/chosen": -429.507080078125, |
|
"logps/rejected": -623.1798706054688, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3549401760101318, |
|
"rewards/margins": 1.7029485702514648, |
|
"rewards/rejected": -3.0578887462615967, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 40.680020887507624, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": 0.6894032955169678, |
|
"logits/rejected": 0.6674355268478394, |
|
"logps/chosen": -420.45233154296875, |
|
"logps/rejected": -537.4315185546875, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3876147270202637, |
|
"rewards/margins": 1.1851856708526611, |
|
"rewards/rejected": -2.572800397872925, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 35.99817855160938, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": 0.6698207855224609, |
|
"logits/rejected": 0.7016371488571167, |
|
"logps/chosen": -433.239501953125, |
|
"logps/rejected": -564.9044799804688, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.443068504333496, |
|
"rewards/margins": 1.2255724668502808, |
|
"rewards/rejected": -2.6686408519744873, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 39.75965895420522, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": 0.6557576656341553, |
|
"logits/rejected": 0.6801853775978088, |
|
"logps/chosen": -389.7122497558594, |
|
"logps/rejected": -529.9295654296875, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1586706638336182, |
|
"rewards/margins": 1.4817416667938232, |
|
"rewards/rejected": -2.6404123306274414, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 27.544765289204754, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": 0.6880534291267395, |
|
"logits/rejected": 0.6456581950187683, |
|
"logps/chosen": -367.79229736328125, |
|
"logps/rejected": -525.629638671875, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4672930240631104, |
|
"rewards/margins": 1.4001801013946533, |
|
"rewards/rejected": -2.8674731254577637, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 24.452787144356392, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": 0.5893281698226929, |
|
"logits/rejected": 0.6441556215286255, |
|
"logps/chosen": -364.4541931152344, |
|
"logps/rejected": -486.20703125, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0919770002365112, |
|
"rewards/margins": 1.092616319656372, |
|
"rewards/rejected": -2.184593439102173, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 44.42792750388695, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": 0.7141789197921753, |
|
"logits/rejected": 0.7477242946624756, |
|
"logps/chosen": -495.19659423828125, |
|
"logps/rejected": -557.9563598632812, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5828977823257446, |
|
"rewards/margins": 1.3042711019515991, |
|
"rewards/rejected": -2.8871684074401855, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 35.45188058330296, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": 0.7555517554283142, |
|
"logits/rejected": 0.8401724100112915, |
|
"logps/chosen": -423.3294372558594, |
|
"logps/rejected": -492.0943908691406, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2794373035430908, |
|
"rewards/margins": 0.7932703495025635, |
|
"rewards/rejected": -2.0727076530456543, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 31.904169316096304, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": 0.6724790930747986, |
|
"logits/rejected": 0.7021657824516296, |
|
"logps/chosen": -463.25274658203125, |
|
"logps/rejected": -532.02294921875, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7021219730377197, |
|
"rewards/margins": 1.30453622341156, |
|
"rewards/rejected": -3.0066585540771484, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 43.15929522651468, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": 0.6783190369606018, |
|
"logits/rejected": 0.7536433935165405, |
|
"logps/chosen": -446.2560119628906, |
|
"logps/rejected": -520.4361572265625, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.470943570137024, |
|
"rewards/margins": 1.3352140188217163, |
|
"rewards/rejected": -2.8061575889587402, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 55.17192263922261, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": 0.6258468627929688, |
|
"logits/rejected": 0.6363823413848877, |
|
"logps/chosen": -445.82501220703125, |
|
"logps/rejected": -570.8683471679688, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.644078016281128, |
|
"rewards/margins": 1.5276083946228027, |
|
"rewards/rejected": -3.1716864109039307, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 37.27131536327267, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": 0.44763240218162537, |
|
"logits/rejected": 0.46375036239624023, |
|
"logps/chosen": -417.30914306640625, |
|
"logps/rejected": -528.802734375, |
|
"loss": 0.5146, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2662084102630615, |
|
"rewards/margins": 1.2129180431365967, |
|
"rewards/rejected": -2.479126214981079, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 34.36417791117091, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": 0.5888969898223877, |
|
"logits/rejected": 0.6028741002082825, |
|
"logps/chosen": -482.534912109375, |
|
"logps/rejected": -555.63330078125, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6587140560150146, |
|
"rewards/margins": 0.8636137843132019, |
|
"rewards/rejected": -2.5223278999328613, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 29.135677726916754, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": 0.7223433256149292, |
|
"logits/rejected": 0.7450364232063293, |
|
"logps/chosen": -455.407470703125, |
|
"logps/rejected": -618.5993041992188, |
|
"loss": 0.513, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9469377994537354, |
|
"rewards/margins": 1.590118646621704, |
|
"rewards/rejected": -3.5370564460754395, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 28.327194690213325, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": 0.6537632942199707, |
|
"logits/rejected": 0.7060953378677368, |
|
"logps/chosen": -362.89678955078125, |
|
"logps/rejected": -533.9738159179688, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3523266315460205, |
|
"rewards/margins": 1.3014628887176514, |
|
"rewards/rejected": -2.653789520263672, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 30.50433526464709, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": 0.67864990234375, |
|
"logits/rejected": 0.6370582580566406, |
|
"logps/chosen": -456.47265625, |
|
"logps/rejected": -558.3876953125, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5260423421859741, |
|
"rewards/margins": 1.3706743717193604, |
|
"rewards/rejected": -2.896716594696045, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 34.85452937307168, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": 0.6534871459007263, |
|
"logits/rejected": 0.6843494772911072, |
|
"logps/chosen": -505.02880859375, |
|
"logps/rejected": -617.3344116210938, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8383209705352783, |
|
"rewards/margins": 1.1424758434295654, |
|
"rewards/rejected": -2.9807963371276855, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 27.148350418756195, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": 0.5433071851730347, |
|
"logits/rejected": 0.562809944152832, |
|
"logps/chosen": -321.7220153808594, |
|
"logps/rejected": -465.24078369140625, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1553895473480225, |
|
"rewards/margins": 1.265106439590454, |
|
"rewards/rejected": -2.4204962253570557, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 46.622796595249234, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": 0.746674656867981, |
|
"logits/rejected": 0.7814425230026245, |
|
"logps/chosen": -480.2022399902344, |
|
"logps/rejected": -509.8692321777344, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.7685315608978271, |
|
"rewards/margins": 0.6169921159744263, |
|
"rewards/rejected": -2.385523557662964, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 30.590228339686885, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": 0.5543047189712524, |
|
"logits/rejected": 0.5367203950881958, |
|
"logps/chosen": -393.0322265625, |
|
"logps/rejected": -499.9285583496094, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2676035165786743, |
|
"rewards/margins": 1.047071099281311, |
|
"rewards/rejected": -2.3146743774414062, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 32.70639486712309, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": 0.7046272158622742, |
|
"logits/rejected": 0.7386664152145386, |
|
"logps/chosen": -361.9070739746094, |
|
"logps/rejected": -509.32611083984375, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3692398071289062, |
|
"rewards/margins": 1.468427300453186, |
|
"rewards/rejected": -2.837667226791382, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 30.843888239231276, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": 0.7129692435264587, |
|
"logits/rejected": 0.7324257493019104, |
|
"logps/chosen": -339.58807373046875, |
|
"logps/rejected": -462.89324951171875, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.1596901416778564, |
|
"rewards/margins": 1.1927125453948975, |
|
"rewards/rejected": -2.352402448654175, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 28.751873470088746, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": 0.5853630304336548, |
|
"logits/rejected": 0.6571609377861023, |
|
"logps/chosen": -337.8079528808594, |
|
"logps/rejected": -537.3619995117188, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4434888362884521, |
|
"rewards/margins": 1.6413323879241943, |
|
"rewards/rejected": -3.0848214626312256, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 38.705907816215095, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": 0.5667808055877686, |
|
"logits/rejected": 0.5656407475471497, |
|
"logps/chosen": -399.50091552734375, |
|
"logps/rejected": -487.841796875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2812906503677368, |
|
"rewards/margins": 1.0314573049545288, |
|
"rewards/rejected": -2.3127479553222656, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 29.141993488554476, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": 0.5703980326652527, |
|
"logits/rejected": 0.6396486163139343, |
|
"logps/chosen": -431.0738220214844, |
|
"logps/rejected": -601.056884765625, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4801702499389648, |
|
"rewards/margins": 1.805931806564331, |
|
"rewards/rejected": -3.286102294921875, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 29.731523931470324, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": 0.6887638568878174, |
|
"logits/rejected": 0.6721312403678894, |
|
"logps/chosen": -482.31561279296875, |
|
"logps/rejected": -583.8924560546875, |
|
"loss": 0.5162, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5862760543823242, |
|
"rewards/margins": 1.089349389076233, |
|
"rewards/rejected": -2.6756255626678467, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 28.832468635301804, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": 0.7398797273635864, |
|
"logits/rejected": 0.7511605024337769, |
|
"logps/chosen": -458.5384826660156, |
|
"logps/rejected": -593.3927612304688, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4789142608642578, |
|
"rewards/margins": 1.1796139478683472, |
|
"rewards/rejected": -2.6585280895233154, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 37.91461771248107, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": 0.6813124418258667, |
|
"logits/rejected": 0.7123424410820007, |
|
"logps/chosen": -483.406005859375, |
|
"logps/rejected": -592.3176879882812, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7192113399505615, |
|
"rewards/margins": 1.3098422288894653, |
|
"rewards/rejected": -3.0290539264678955, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 30.53434537820314, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": 0.7137466669082642, |
|
"logits/rejected": 0.6785651445388794, |
|
"logps/chosen": -489.4556579589844, |
|
"logps/rejected": -642.427490234375, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4436726570129395, |
|
"rewards/margins": 1.7120336294174194, |
|
"rewards/rejected": -3.1557064056396484, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 46.86299931832656, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": 0.6887821555137634, |
|
"logits/rejected": 0.7490508556365967, |
|
"logps/chosen": -431.67022705078125, |
|
"logps/rejected": -574.1552734375, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5646417140960693, |
|
"rewards/margins": 1.6732689142227173, |
|
"rewards/rejected": -3.237910747528076, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 28.83675788521102, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": 0.6017109751701355, |
|
"logits/rejected": 0.5938320755958557, |
|
"logps/chosen": -454.86676025390625, |
|
"logps/rejected": -602.7736206054688, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.750307321548462, |
|
"rewards/margins": 1.2305312156677246, |
|
"rewards/rejected": -2.9808382987976074, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 39.34411253851995, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": 0.6451660394668579, |
|
"logits/rejected": 0.6789873838424683, |
|
"logps/chosen": -441.163330078125, |
|
"logps/rejected": -547.3287353515625, |
|
"loss": 0.4773, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3598965406417847, |
|
"rewards/margins": 1.3745959997177124, |
|
"rewards/rejected": -2.734492540359497, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 33.80213582312191, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": 0.6281944513320923, |
|
"logits/rejected": 0.6428082585334778, |
|
"logps/chosen": -436.121826171875, |
|
"logps/rejected": -549.109619140625, |
|
"loss": 0.4539, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5763047933578491, |
|
"rewards/margins": 1.3722374439239502, |
|
"rewards/rejected": -2.9485421180725098, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 33.8504088811558, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": 0.6952093839645386, |
|
"logits/rejected": 0.7127727270126343, |
|
"logps/chosen": -426.577880859375, |
|
"logps/rejected": -585.4749145507812, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.641758680343628, |
|
"rewards/margins": 1.7743602991104126, |
|
"rewards/rejected": -3.41611909866333, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 36.51412890934737, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": 0.7390758395195007, |
|
"logits/rejected": 0.7217782139778137, |
|
"logps/chosen": -395.6050720214844, |
|
"logps/rejected": -596.3590087890625, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6227900981903076, |
|
"rewards/margins": 2.1645493507385254, |
|
"rewards/rejected": -3.787339448928833, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 30.15398627394634, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": 0.6722254753112793, |
|
"logits/rejected": 0.7073189616203308, |
|
"logps/chosen": -420.848876953125, |
|
"logps/rejected": -572.2374877929688, |
|
"loss": 0.5061, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.448339819908142, |
|
"rewards/margins": 1.4826971292495728, |
|
"rewards/rejected": -2.931036949157715, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 33.15513048467062, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": 0.8221071362495422, |
|
"logits/rejected": 0.8116429448127747, |
|
"logps/chosen": -450.3140563964844, |
|
"logps/rejected": -565.424072265625, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6256906986236572, |
|
"rewards/margins": 1.3659015893936157, |
|
"rewards/rejected": -2.9915921688079834, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 28.899348484638733, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": 0.7205225825309753, |
|
"logits/rejected": 0.6872090101242065, |
|
"logps/chosen": -433.659912109375, |
|
"logps/rejected": -501.6417541503906, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4491467475891113, |
|
"rewards/margins": 1.1040198802947998, |
|
"rewards/rejected": -2.553166389465332, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 31.437945405932854, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": 0.7566556930541992, |
|
"logits/rejected": 0.7423285245895386, |
|
"logps/chosen": -362.8130798339844, |
|
"logps/rejected": -492.66058349609375, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.347149133682251, |
|
"rewards/margins": 0.9845093488693237, |
|
"rewards/rejected": -2.3316586017608643, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 28.49317814535035, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": 0.7042705416679382, |
|
"logits/rejected": 0.6866374611854553, |
|
"logps/chosen": -441.2322692871094, |
|
"logps/rejected": -548.809814453125, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3135347366333008, |
|
"rewards/margins": 1.3202944993972778, |
|
"rewards/rejected": -2.6338295936584473, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 44.16067627194287, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": 0.6853736042976379, |
|
"logits/rejected": 0.7283433675765991, |
|
"logps/chosen": -435.65362548828125, |
|
"logps/rejected": -469.2335510253906, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3361632823944092, |
|
"rewards/margins": 0.6489462852478027, |
|
"rewards/rejected": -1.9851096868515015, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 39.47362796340898, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": 0.7125884294509888, |
|
"logits/rejected": 0.7073049545288086, |
|
"logps/chosen": -366.3876953125, |
|
"logps/rejected": -517.1468505859375, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4312505722045898, |
|
"rewards/margins": 1.1074860095977783, |
|
"rewards/rejected": -2.538736343383789, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 44.465462304398336, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": 0.6530724167823792, |
|
"logits/rejected": 0.6677568554878235, |
|
"logps/chosen": -544.2484130859375, |
|
"logps/rejected": -627.7911376953125, |
|
"loss": 0.5358, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0512328147888184, |
|
"rewards/margins": 1.2890361547470093, |
|
"rewards/rejected": -3.340269088745117, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 41.65465122772129, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": 0.6696042418479919, |
|
"logits/rejected": 0.6906247138977051, |
|
"logps/chosen": -351.27984619140625, |
|
"logps/rejected": -566.343017578125, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.212708830833435, |
|
"rewards/margins": 2.181748628616333, |
|
"rewards/rejected": -3.3944575786590576, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 37.16565124609395, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": 0.7126466035842896, |
|
"logits/rejected": 0.6710564494132996, |
|
"logps/chosen": -423.04034423828125, |
|
"logps/rejected": -557.0213623046875, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7721456289291382, |
|
"rewards/margins": 1.449415922164917, |
|
"rewards/rejected": -3.2215614318847656, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 23.599726604923614, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": 0.7398973703384399, |
|
"logits/rejected": 0.751029372215271, |
|
"logps/chosen": -400.4888000488281, |
|
"logps/rejected": -586.410400390625, |
|
"loss": 0.4405, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4728138446807861, |
|
"rewards/margins": 1.921562910079956, |
|
"rewards/rejected": -3.394376754760742, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 47.47497023724174, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": 0.7788910269737244, |
|
"logits/rejected": 0.8002176284790039, |
|
"logps/chosen": -407.0137634277344, |
|
"logps/rejected": -546.9268798828125, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.223954439163208, |
|
"rewards/margins": 1.2972733974456787, |
|
"rewards/rejected": -2.5212275981903076, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 54.01121345632537, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": 0.6500368714332581, |
|
"logits/rejected": 0.7030578851699829, |
|
"logps/chosen": -501.3116149902344, |
|
"logps/rejected": -648.521728515625, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.90944504737854, |
|
"rewards/margins": 1.923628568649292, |
|
"rewards/rejected": -3.833073377609253, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 36.20984331034919, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": 0.7043317556381226, |
|
"logits/rejected": 0.7164947986602783, |
|
"logps/chosen": -452.80145263671875, |
|
"logps/rejected": -507.310791015625, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5479075908660889, |
|
"rewards/margins": 1.1401413679122925, |
|
"rewards/rejected": -2.688048839569092, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 32.72403763285082, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": 0.7559349536895752, |
|
"logits/rejected": 0.7914206385612488, |
|
"logps/chosen": -390.57232666015625, |
|
"logps/rejected": -523.0045776367188, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.420803427696228, |
|
"rewards/margins": 1.2412652969360352, |
|
"rewards/rejected": -2.6620688438415527, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 41.93143711138968, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 0.7086448073387146, |
|
"logits/rejected": 0.6970892548561096, |
|
"logps/chosen": -408.8316345214844, |
|
"logps/rejected": -539.7716064453125, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.546519160270691, |
|
"rewards/margins": 1.1031622886657715, |
|
"rewards/rejected": -2.649681568145752, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 43.87698418658202, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": 0.678581714630127, |
|
"logits/rejected": 0.6445807814598083, |
|
"logps/chosen": -392.3583984375, |
|
"logps/rejected": -468.27911376953125, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.335402488708496, |
|
"rewards/margins": 0.9563083648681641, |
|
"rewards/rejected": -2.29171085357666, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 28.844880569875237, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": 0.6901575326919556, |
|
"logits/rejected": 0.7580747008323669, |
|
"logps/chosen": -356.3608093261719, |
|
"logps/rejected": -538.3591918945312, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2393145561218262, |
|
"rewards/margins": 2.0787644386291504, |
|
"rewards/rejected": -3.3180789947509766, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 31.987174559648025, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": 0.6704432368278503, |
|
"logits/rejected": 0.8194705247879028, |
|
"logps/chosen": -471.6807556152344, |
|
"logps/rejected": -564.5304565429688, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6247665882110596, |
|
"rewards/margins": 1.192398190498352, |
|
"rewards/rejected": -2.817164659500122, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 33.470079538599556, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": 0.6915429830551147, |
|
"logits/rejected": 0.7422800660133362, |
|
"logps/chosen": -452.6827087402344, |
|
"logps/rejected": -586.0211181640625, |
|
"loss": 0.518, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7043472528457642, |
|
"rewards/margins": 1.16866934299469, |
|
"rewards/rejected": -2.873016357421875, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 28.19482569283347, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": 0.6776562929153442, |
|
"logits/rejected": 0.7009492516517639, |
|
"logps/chosen": -423.5204162597656, |
|
"logps/rejected": -529.0217895507812, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3196834325790405, |
|
"rewards/margins": 1.1787526607513428, |
|
"rewards/rejected": -2.4984359741210938, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 25.36749286718069, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": 0.7724124789237976, |
|
"logits/rejected": 0.7605770230293274, |
|
"logps/chosen": -458.8877868652344, |
|
"logps/rejected": -655.66552734375, |
|
"loss": 0.4933, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7062854766845703, |
|
"rewards/margins": 2.266158103942871, |
|
"rewards/rejected": -3.9724433422088623, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 31.238032068852384, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": 0.6670851707458496, |
|
"logits/rejected": 0.7134937047958374, |
|
"logps/chosen": -439.99688720703125, |
|
"logps/rejected": -536.1776123046875, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5214483737945557, |
|
"rewards/margins": 1.321410059928894, |
|
"rewards/rejected": -2.8428585529327393, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 33.34150808500615, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": 0.7619131803512573, |
|
"logits/rejected": 0.789661705493927, |
|
"logps/chosen": -435.0225524902344, |
|
"logps/rejected": -573.7952880859375, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7262579202651978, |
|
"rewards/margins": 1.4246705770492554, |
|
"rewards/rejected": -3.150928258895874, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 29.44124572086776, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": 0.657812774181366, |
|
"logits/rejected": 0.7025941014289856, |
|
"logps/chosen": -416.65948486328125, |
|
"logps/rejected": -534.0513305664062, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4878287315368652, |
|
"rewards/margins": 1.1736620664596558, |
|
"rewards/rejected": -2.6614909172058105, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 38.40279226701135, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": 0.7255043387413025, |
|
"logits/rejected": 0.7269566655158997, |
|
"logps/chosen": -447.64080810546875, |
|
"logps/rejected": -507.58966064453125, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7017924785614014, |
|
"rewards/margins": 0.7691752910614014, |
|
"rewards/rejected": -2.4709675312042236, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 39.684598268718624, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": 0.6789619326591492, |
|
"logits/rejected": 0.6977161169052124, |
|
"logps/chosen": -454.3805236816406, |
|
"logps/rejected": -490.30908203125, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4421659708023071, |
|
"rewards/margins": 1.0759384632110596, |
|
"rewards/rejected": -2.5181047916412354, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 34.83351031389863, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": 0.768031895160675, |
|
"logits/rejected": 0.7473145723342896, |
|
"logps/chosen": -501.7557678222656, |
|
"logps/rejected": -619.917236328125, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8463929891586304, |
|
"rewards/margins": 1.5131759643554688, |
|
"rewards/rejected": -3.3595688343048096, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 35.31190929686924, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": 0.6237305998802185, |
|
"logits/rejected": 0.6354110240936279, |
|
"logps/chosen": -510.52801513671875, |
|
"logps/rejected": -576.4329833984375, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5719668865203857, |
|
"rewards/margins": 1.4724681377410889, |
|
"rewards/rejected": -3.0444350242614746, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 41.916022771150374, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": 0.7030372619628906, |
|
"logits/rejected": 0.719199538230896, |
|
"logps/chosen": -357.8835144042969, |
|
"logps/rejected": -461.7693786621094, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2769114971160889, |
|
"rewards/margins": 1.2639166116714478, |
|
"rewards/rejected": -2.540827751159668, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 27.73526705705633, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": 0.7158384919166565, |
|
"logits/rejected": 0.7426877021789551, |
|
"logps/chosen": -401.23834228515625, |
|
"logps/rejected": -585.4378051757812, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5427526235580444, |
|
"rewards/margins": 1.556259274482727, |
|
"rewards/rejected": -3.0990118980407715, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 44.61122166912319, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": 0.6555548310279846, |
|
"logits/rejected": 0.6843029260635376, |
|
"logps/chosen": -425.64776611328125, |
|
"logps/rejected": -592.9639892578125, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.610778570175171, |
|
"rewards/margins": 1.449095606803894, |
|
"rewards/rejected": -3.0598740577697754, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 27.134645439655188, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": 0.650478184223175, |
|
"logits/rejected": 0.6665130257606506, |
|
"logps/chosen": -483.04779052734375, |
|
"logps/rejected": -646.669677734375, |
|
"loss": 0.4583, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.76322340965271, |
|
"rewards/margins": 1.3918665647506714, |
|
"rewards/rejected": -3.155089855194092, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 30.88613336840347, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": 0.6785479784011841, |
|
"logits/rejected": 0.7193585634231567, |
|
"logps/chosen": -427.58758544921875, |
|
"logps/rejected": -651.4105224609375, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9091869592666626, |
|
"rewards/margins": 2.4718122482299805, |
|
"rewards/rejected": -4.380999565124512, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 39.208213968014256, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": 0.6814306378364563, |
|
"logits/rejected": 0.7048656344413757, |
|
"logps/chosen": -378.8595886230469, |
|
"logps/rejected": -436.8194274902344, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.463390588760376, |
|
"rewards/margins": 0.6034170985221863, |
|
"rewards/rejected": -2.066807746887207, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 28.560854060893856, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": 0.6933716535568237, |
|
"logits/rejected": 0.6919404864311218, |
|
"logps/chosen": -597.2493896484375, |
|
"logps/rejected": -596.7833251953125, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.024073600769043, |
|
"rewards/margins": 0.7939941883087158, |
|
"rewards/rejected": -2.818067789077759, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 31.695811688746335, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": 0.6247652173042297, |
|
"logits/rejected": 0.6395012736320496, |
|
"logps/chosen": -460.6663513183594, |
|
"logps/rejected": -528.9644775390625, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.649450659751892, |
|
"rewards/margins": 1.0935943126678467, |
|
"rewards/rejected": -2.7430450916290283, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 31.093359172893916, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": 0.7312600612640381, |
|
"logits/rejected": 0.7648332715034485, |
|
"logps/chosen": -457.8077697753906, |
|
"logps/rejected": -568.5828247070312, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7661203145980835, |
|
"rewards/margins": 1.2773500680923462, |
|
"rewards/rejected": -3.0434701442718506, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 51.951004563036165, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": 0.5918694734573364, |
|
"logits/rejected": 0.6510140299797058, |
|
"logps/chosen": -518.0965576171875, |
|
"logps/rejected": -644.2404174804688, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8913600444793701, |
|
"rewards/margins": 1.7554857730865479, |
|
"rewards/rejected": -3.646846055984497, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 30.668402411962806, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": 0.6927034258842468, |
|
"logits/rejected": 0.7566369771957397, |
|
"logps/chosen": -466.992431640625, |
|
"logps/rejected": -573.04638671875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7490732669830322, |
|
"rewards/margins": 1.085127353668213, |
|
"rewards/rejected": -2.834200382232666, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 33.39654631864919, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": 0.6728897094726562, |
|
"logits/rejected": 0.7135699987411499, |
|
"logps/chosen": -461.8128967285156, |
|
"logps/rejected": -448.58978271484375, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.641575813293457, |
|
"rewards/margins": 0.3381771445274353, |
|
"rewards/rejected": -1.9797531366348267, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 54.916707180210906, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": 0.7350330352783203, |
|
"logits/rejected": 0.7353192567825317, |
|
"logps/chosen": -449.59814453125, |
|
"logps/rejected": -616.7791137695312, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.633469820022583, |
|
"rewards/margins": 1.7337658405303955, |
|
"rewards/rejected": -3.3672356605529785, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 34.40985347014142, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": 0.70672607421875, |
|
"logits/rejected": 0.7129568457603455, |
|
"logps/chosen": -392.37969970703125, |
|
"logps/rejected": -576.6778564453125, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2961803674697876, |
|
"rewards/margins": 1.9318246841430664, |
|
"rewards/rejected": -3.2280051708221436, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 34.150066441594575, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": 0.7242936491966248, |
|
"logits/rejected": 0.7108465433120728, |
|
"logps/chosen": -474.98565673828125, |
|
"logps/rejected": -555.7901000976562, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.126871109008789, |
|
"rewards/margins": 0.8020073175430298, |
|
"rewards/rejected": -2.9288783073425293, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 34.55135904008657, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": 0.6907113194465637, |
|
"logits/rejected": 0.6848424673080444, |
|
"logps/chosen": -533.1072998046875, |
|
"logps/rejected": -573.22509765625, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.085261106491089, |
|
"rewards/margins": 1.0348024368286133, |
|
"rewards/rejected": -3.120063304901123, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 26.808282974911915, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": 0.6466808319091797, |
|
"logits/rejected": 0.6538549661636353, |
|
"logps/chosen": -456.07049560546875, |
|
"logps/rejected": -551.8894653320312, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.423441767692566, |
|
"rewards/margins": 1.1218823194503784, |
|
"rewards/rejected": -2.5453243255615234, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 28.83291099893329, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": 0.6636977195739746, |
|
"logits/rejected": 0.6604019999504089, |
|
"logps/chosen": -456.4715270996094, |
|
"logps/rejected": -504.9053649902344, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4351160526275635, |
|
"rewards/margins": 0.7614234685897827, |
|
"rewards/rejected": -2.1965396404266357, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 42.02738859118274, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": 0.6333275437355042, |
|
"logits/rejected": 0.6459077596664429, |
|
"logps/chosen": -352.7582092285156, |
|
"logps/rejected": -492.6253967285156, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1113379001617432, |
|
"rewards/margins": 1.3925281763076782, |
|
"rewards/rejected": -2.503865957260132, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 28.592815771886347, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": 0.8196449279785156, |
|
"logits/rejected": 0.7500802874565125, |
|
"logps/chosen": -360.6011657714844, |
|
"logps/rejected": -559.92919921875, |
|
"loss": 0.4666, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.395541787147522, |
|
"rewards/margins": 1.602349042892456, |
|
"rewards/rejected": -2.9978909492492676, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 52.0300622408455, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": 0.6689568758010864, |
|
"logits/rejected": 0.7228366136550903, |
|
"logps/chosen": -469.3619079589844, |
|
"logps/rejected": -608.2591552734375, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9589526653289795, |
|
"rewards/margins": 1.4972124099731445, |
|
"rewards/rejected": -3.456165313720703, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 44.27630823570909, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": 0.7316599488258362, |
|
"logits/rejected": 0.756661593914032, |
|
"logps/chosen": -514.4577026367188, |
|
"logps/rejected": -639.2863159179688, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.047656297683716, |
|
"rewards/margins": 1.3594030141830444, |
|
"rewards/rejected": -3.407059907913208, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 28.64271031691096, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": 0.6066162586212158, |
|
"logits/rejected": 0.6715503931045532, |
|
"logps/chosen": -382.43878173828125, |
|
"logps/rejected": -457.4537048339844, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4242990016937256, |
|
"rewards/margins": 0.8341078758239746, |
|
"rewards/rejected": -2.2584068775177, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 51.494044930017054, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": 0.7110476493835449, |
|
"logits/rejected": 0.6737565994262695, |
|
"logps/chosen": -428.01654052734375, |
|
"logps/rejected": -599.3654174804688, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.751559853553772, |
|
"rewards/margins": 1.7291587591171265, |
|
"rewards/rejected": -3.4807181358337402, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 32.8193357126353, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": 0.8528485298156738, |
|
"logits/rejected": 0.8270418047904968, |
|
"logps/chosen": -485.8948669433594, |
|
"logps/rejected": -512.58935546875, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5045112371444702, |
|
"rewards/margins": 0.845577597618103, |
|
"rewards/rejected": -2.3500890731811523, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 42.77177983927304, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": 0.6248888373374939, |
|
"logits/rejected": 0.6356110572814941, |
|
"logps/chosen": -448.6212463378906, |
|
"logps/rejected": -572.7357177734375, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.826633095741272, |
|
"rewards/margins": 0.8497768640518188, |
|
"rewards/rejected": -2.67641019821167, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 45.77767199385067, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": 0.7616924047470093, |
|
"logits/rejected": 0.782433271408081, |
|
"logps/chosen": -378.56072998046875, |
|
"logps/rejected": -471.501708984375, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4830647706985474, |
|
"rewards/margins": 0.9338682293891907, |
|
"rewards/rejected": -2.416933059692383, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 38.72474554060603, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": 0.6768021583557129, |
|
"logits/rejected": 0.6709715723991394, |
|
"logps/chosen": -418.63922119140625, |
|
"logps/rejected": -630.6937255859375, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.439439296722412, |
|
"rewards/margins": 2.4228386878967285, |
|
"rewards/rejected": -3.8622779846191406, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 57.14033737148358, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": 0.6473024487495422, |
|
"logits/rejected": 0.6710727214813232, |
|
"logps/chosen": -385.234375, |
|
"logps/rejected": -537.0969848632812, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.539281964302063, |
|
"rewards/margins": 1.319455862045288, |
|
"rewards/rejected": -2.8587379455566406, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 42.126294732393575, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": 0.7393206357955933, |
|
"logits/rejected": 0.7721438407897949, |
|
"logps/chosen": -342.84613037109375, |
|
"logps/rejected": -438.0054626464844, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2897088527679443, |
|
"rewards/margins": 1.3247145414352417, |
|
"rewards/rejected": -2.6144232749938965, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 45.48215970360758, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": 0.6715846061706543, |
|
"logits/rejected": 0.7098231911659241, |
|
"logps/chosen": -445.4051818847656, |
|
"logps/rejected": -614.8271484375, |
|
"loss": 0.4511, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5650415420532227, |
|
"rewards/margins": 2.225489377975464, |
|
"rewards/rejected": -3.7905311584472656, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 33.58800202447882, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": 0.6484588980674744, |
|
"logits/rejected": 0.6462217569351196, |
|
"logps/chosen": -418.74310302734375, |
|
"logps/rejected": -575.1314086914062, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5988870859146118, |
|
"rewards/margins": 1.3143718242645264, |
|
"rewards/rejected": -2.9132585525512695, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 35.56501086040675, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": 0.6811990141868591, |
|
"logits/rejected": 0.6851666569709778, |
|
"logps/chosen": -363.73486328125, |
|
"logps/rejected": -558.9583740234375, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2833194732666016, |
|
"rewards/margins": 1.7774661779403687, |
|
"rewards/rejected": -3.0607855319976807, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 48.885323582555905, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": 0.6057692766189575, |
|
"logits/rejected": 0.6347171664237976, |
|
"logps/chosen": -456.45050048828125, |
|
"logps/rejected": -491.285888671875, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7665584087371826, |
|
"rewards/margins": 0.7946644425392151, |
|
"rewards/rejected": -2.561223268508911, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 52.026359398966434, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": 0.68748939037323, |
|
"logits/rejected": 0.7001315355300903, |
|
"logps/chosen": -417.6197204589844, |
|
"logps/rejected": -501.4674377441406, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6828155517578125, |
|
"rewards/margins": 0.8300768733024597, |
|
"rewards/rejected": -2.512892246246338, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 40.657536973128096, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": 0.7110599279403687, |
|
"logits/rejected": 0.7387591600418091, |
|
"logps/chosen": -483.6581115722656, |
|
"logps/rejected": -655.469970703125, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9261047840118408, |
|
"rewards/margins": 1.5848186016082764, |
|
"rewards/rejected": -3.510922908782959, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 43.24860595798503, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": 0.6262224316596985, |
|
"logits/rejected": 0.6679535508155823, |
|
"logps/chosen": -383.0298156738281, |
|
"logps/rejected": -520.960205078125, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4945952892303467, |
|
"rewards/margins": 1.4201505184173584, |
|
"rewards/rejected": -2.914746046066284, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 30.03259002192758, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": 0.7243183851242065, |
|
"logits/rejected": 0.7136281132698059, |
|
"logps/chosen": -464.9974060058594, |
|
"logps/rejected": -572.7579956054688, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7093970775604248, |
|
"rewards/margins": 1.3652998208999634, |
|
"rewards/rejected": -3.0746970176696777, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 31.840341115692503, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": 0.5662111639976501, |
|
"logits/rejected": 0.5857071876525879, |
|
"logps/chosen": -462.5718688964844, |
|
"logps/rejected": -586.9605712890625, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7450441122055054, |
|
"rewards/margins": 1.6221263408660889, |
|
"rewards/rejected": -3.3671703338623047, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 33.21694320716617, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": 0.6892956495285034, |
|
"logits/rejected": 0.7154291868209839, |
|
"logps/chosen": -404.4335021972656, |
|
"logps/rejected": -538.8549194335938, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5369670391082764, |
|
"rewards/margins": 1.303447961807251, |
|
"rewards/rejected": -2.8404150009155273, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 51.74258645160025, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": 0.6136958003044128, |
|
"logits/rejected": 0.637305498123169, |
|
"logps/chosen": -522.9058837890625, |
|
"logps/rejected": -606.9166259765625, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7736104726791382, |
|
"rewards/margins": 1.2294580936431885, |
|
"rewards/rejected": -3.003068447113037, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 31.939433243371568, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": 0.6770081520080566, |
|
"logits/rejected": 0.6946592926979065, |
|
"logps/chosen": -465.00634765625, |
|
"logps/rejected": -675.024169921875, |
|
"loss": 0.4626, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8869049549102783, |
|
"rewards/margins": 1.8580411672592163, |
|
"rewards/rejected": -3.744946002960205, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 32.427853119545475, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": 0.5860388875007629, |
|
"logits/rejected": 0.6506261825561523, |
|
"logps/chosen": -435.30615234375, |
|
"logps/rejected": -577.177490234375, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5946733951568604, |
|
"rewards/margins": 1.3281992673873901, |
|
"rewards/rejected": -2.922873020172119, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 27.60653570560778, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": 0.6711539030075073, |
|
"logits/rejected": 0.7092609405517578, |
|
"logps/chosen": -433.2408142089844, |
|
"logps/rejected": -622.9896240234375, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.802742600440979, |
|
"rewards/margins": 2.034799814224243, |
|
"rewards/rejected": -3.8375420570373535, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 36.488044765002755, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": 0.7667958736419678, |
|
"logits/rejected": 0.7162773013114929, |
|
"logps/chosen": -481.6435546875, |
|
"logps/rejected": -564.5675659179688, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5850133895874023, |
|
"rewards/margins": 1.4455559253692627, |
|
"rewards/rejected": -3.030569553375244, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 46.89570443675896, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": 0.6919952630996704, |
|
"logits/rejected": 0.6967671513557434, |
|
"logps/chosen": -491.3282165527344, |
|
"logps/rejected": -630.562255859375, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4634296894073486, |
|
"rewards/margins": 1.5717954635620117, |
|
"rewards/rejected": -3.0352253913879395, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 38.92483757033139, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": 0.6587939858436584, |
|
"logits/rejected": 0.6271030902862549, |
|
"logps/chosen": -507.9803161621094, |
|
"logps/rejected": -711.3810424804688, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9189660549163818, |
|
"rewards/margins": 2.0694832801818848, |
|
"rewards/rejected": -3.988449811935425, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 39.99005445096094, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": 0.6913423538208008, |
|
"logits/rejected": 0.7273651957511902, |
|
"logps/chosen": -432.20458984375, |
|
"logps/rejected": -463.5044860839844, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4659842252731323, |
|
"rewards/margins": 0.7735913991928101, |
|
"rewards/rejected": -2.2395756244659424, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 42.80930835029662, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": 0.6128347516059875, |
|
"logits/rejected": 0.7182141542434692, |
|
"logps/chosen": -448.8204040527344, |
|
"logps/rejected": -642.3395385742188, |
|
"loss": 0.4698, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.679520606994629, |
|
"rewards/margins": 1.9481945037841797, |
|
"rewards/rejected": -3.6277146339416504, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 29.4673000753317, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": 0.5145514011383057, |
|
"logits/rejected": 0.604987621307373, |
|
"logps/chosen": -407.84942626953125, |
|
"logps/rejected": -521.2293701171875, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.442375898361206, |
|
"rewards/margins": 1.062880277633667, |
|
"rewards/rejected": -2.505256175994873, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 47.79956261456909, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": 0.7111871242523193, |
|
"logits/rejected": 0.7023571729660034, |
|
"logps/chosen": -472.0362243652344, |
|
"logps/rejected": -679.4020385742188, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7883527278900146, |
|
"rewards/margins": 1.815100073814392, |
|
"rewards/rejected": -3.6034531593322754, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 35.38700307758538, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": 0.5503877997398376, |
|
"logits/rejected": 0.5560935735702515, |
|
"logps/chosen": -431.03704833984375, |
|
"logps/rejected": -531.4757690429688, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6167274713516235, |
|
"rewards/margins": 0.931064248085022, |
|
"rewards/rejected": -2.5477917194366455, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 34.31827386168351, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": 0.6740199327468872, |
|
"logits/rejected": 0.696508526802063, |
|
"logps/chosen": -416.02911376953125, |
|
"logps/rejected": -684.9498901367188, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8001444339752197, |
|
"rewards/margins": 2.1946005821228027, |
|
"rewards/rejected": -3.9947447776794434, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 31.376350645526617, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": 0.6382318735122681, |
|
"logits/rejected": 0.6174970865249634, |
|
"logps/chosen": -444.5672302246094, |
|
"logps/rejected": -539.0169677734375, |
|
"loss": 0.485, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6137769222259521, |
|
"rewards/margins": 0.47504907846450806, |
|
"rewards/rejected": -2.0888259410858154, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 30.055393249663297, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": 0.582071840763092, |
|
"logits/rejected": 0.585811972618103, |
|
"logps/chosen": -448.01507568359375, |
|
"logps/rejected": -605.9859008789062, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6493518352508545, |
|
"rewards/margins": 2.0434372425079346, |
|
"rewards/rejected": -3.692789077758789, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 37.008005737101136, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": 0.5251346230506897, |
|
"logits/rejected": 0.5287281274795532, |
|
"logps/chosen": -482.1429138183594, |
|
"logps/rejected": -692.1409301757812, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7261474132537842, |
|
"rewards/margins": 2.155860424041748, |
|
"rewards/rejected": -3.8820080757141113, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 42.882926576601946, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": 0.5571666359901428, |
|
"logits/rejected": 0.5792709589004517, |
|
"logps/chosen": -371.2899475097656, |
|
"logps/rejected": -534.5432739257812, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3483814001083374, |
|
"rewards/margins": 1.4740469455718994, |
|
"rewards/rejected": -2.8224284648895264, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 31.83370209912519, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": 0.7409833073616028, |
|
"logits/rejected": 0.7550204396247864, |
|
"logps/chosen": -449.55535888671875, |
|
"logps/rejected": -595.6565551757812, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6462414264678955, |
|
"rewards/margins": 1.765639066696167, |
|
"rewards/rejected": -3.4118804931640625, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 29.90782487751284, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": 0.6009663343429565, |
|
"logits/rejected": 0.6457256078720093, |
|
"logps/chosen": -460.5867614746094, |
|
"logps/rejected": -528.3399047851562, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7170873880386353, |
|
"rewards/margins": 0.7792832851409912, |
|
"rewards/rejected": -2.496370792388916, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 30.797799497811695, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": 0.7179510593414307, |
|
"logits/rejected": 0.7142021059989929, |
|
"logps/chosen": -480.66156005859375, |
|
"logps/rejected": -584.0746459960938, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6999647617340088, |
|
"rewards/margins": 1.0811774730682373, |
|
"rewards/rejected": -2.781142234802246, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 32.61397088226819, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": 0.5735732316970825, |
|
"logits/rejected": 0.624186635017395, |
|
"logps/chosen": -483.68231201171875, |
|
"logps/rejected": -595.20166015625, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.586411714553833, |
|
"rewards/margins": 1.3250799179077148, |
|
"rewards/rejected": -2.911491870880127, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 31.88712634446775, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": 0.695099949836731, |
|
"logits/rejected": 0.7135946154594421, |
|
"logps/chosen": -465.44671630859375, |
|
"logps/rejected": -647.7462768554688, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9738279581069946, |
|
"rewards/margins": 1.9751651287078857, |
|
"rewards/rejected": -3.94899320602417, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 26.750814685545, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": 0.5124061703681946, |
|
"logits/rejected": 0.5727663040161133, |
|
"logps/chosen": -414.16766357421875, |
|
"logps/rejected": -561.9720458984375, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8436729907989502, |
|
"rewards/margins": 0.9941961169242859, |
|
"rewards/rejected": -2.837869167327881, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 33.88554208031626, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": 0.6761840581893921, |
|
"logits/rejected": 0.7198007702827454, |
|
"logps/chosen": -418.03204345703125, |
|
"logps/rejected": -517.2802734375, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9059219360351562, |
|
"rewards/margins": 1.1033378839492798, |
|
"rewards/rejected": -3.0092597007751465, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 35.312495401161826, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": 0.60147625207901, |
|
"logits/rejected": 0.6507999300956726, |
|
"logps/chosen": -451.183349609375, |
|
"logps/rejected": -656.1090087890625, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.024777889251709, |
|
"rewards/margins": 1.9228811264038086, |
|
"rewards/rejected": -3.9476592540740967, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 40.30857026059499, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": 0.6580095887184143, |
|
"logits/rejected": 0.61357581615448, |
|
"logps/chosen": -445.29559326171875, |
|
"logps/rejected": -532.5142211914062, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4763895273208618, |
|
"rewards/margins": 0.9797104001045227, |
|
"rewards/rejected": -2.4560999870300293, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 29.89336340827424, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": 0.6960023641586304, |
|
"logits/rejected": 0.6264194250106812, |
|
"logps/chosen": -556.0472412109375, |
|
"logps/rejected": -656.1188354492188, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.868370771408081, |
|
"rewards/margins": 1.122097134590149, |
|
"rewards/rejected": -2.9904677867889404, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": 0.7145485281944275, |
|
"eval_logits/rejected": 0.7296622395515442, |
|
"eval_logps/chosen": -449.66363525390625, |
|
"eval_logps/rejected": -602.4407958984375, |
|
"eval_loss": 0.48357322812080383, |
|
"eval_rewards/accuracies": 0.7892857193946838, |
|
"eval_rewards/chosen": -1.707390308380127, |
|
"eval_rewards/margins": 1.523337721824646, |
|
"eval_rewards/rejected": -3.2307281494140625, |
|
"eval_runtime": 86.6873, |
|
"eval_samples_per_second": 51.461, |
|
"eval_steps_per_second": 0.808, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 58.07953756761719, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": 0.6966633200645447, |
|
"logits/rejected": 0.7185578942298889, |
|
"logps/chosen": -392.63397216796875, |
|
"logps/rejected": -583.4874267578125, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5413339138031006, |
|
"rewards/margins": 1.9730831384658813, |
|
"rewards/rejected": -3.5144171714782715, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 46.10404234203207, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": 0.6493710279464722, |
|
"logits/rejected": 0.6657570004463196, |
|
"logps/chosen": -575.0656127929688, |
|
"logps/rejected": -729.4915771484375, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0319643020629883, |
|
"rewards/margins": 1.6315898895263672, |
|
"rewards/rejected": -3.6635544300079346, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 39.22948436791097, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": 0.5851081013679504, |
|
"logits/rejected": 0.5799533128738403, |
|
"logps/chosen": -480.31463623046875, |
|
"logps/rejected": -570.2797241210938, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7180426120758057, |
|
"rewards/margins": 0.8774716258049011, |
|
"rewards/rejected": -2.5955138206481934, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 50.62146798404109, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": 0.5723304748535156, |
|
"logits/rejected": 0.6564613580703735, |
|
"logps/chosen": -469.9656677246094, |
|
"logps/rejected": -674.6346435546875, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.904486894607544, |
|
"rewards/margins": 2.1075260639190674, |
|
"rewards/rejected": -4.012012481689453, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 49.07423025714521, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": 0.6182990074157715, |
|
"logits/rejected": 0.6073701977729797, |
|
"logps/chosen": -455.4971618652344, |
|
"logps/rejected": -613.2784423828125, |
|
"loss": 0.487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.631026029586792, |
|
"rewards/margins": 1.453333854675293, |
|
"rewards/rejected": -3.084359645843506, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 27.42422616328582, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": 0.58095383644104, |
|
"logits/rejected": 0.6131027340888977, |
|
"logps/chosen": -382.36871337890625, |
|
"logps/rejected": -575.1259155273438, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5139602422714233, |
|
"rewards/margins": 1.8817888498306274, |
|
"rewards/rejected": -3.3957488536834717, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 43.846222208955815, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": 0.7621703147888184, |
|
"logits/rejected": 0.7751132845878601, |
|
"logps/chosen": -456.3731994628906, |
|
"logps/rejected": -598.9076538085938, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.952593207359314, |
|
"rewards/margins": 1.5192127227783203, |
|
"rewards/rejected": -3.471806049346924, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 40.952265372902325, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": 0.6373910307884216, |
|
"logits/rejected": 0.6397606730461121, |
|
"logps/chosen": -465.97174072265625, |
|
"logps/rejected": -629.4970092773438, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8233009576797485, |
|
"rewards/margins": 1.4212223291397095, |
|
"rewards/rejected": -3.2445228099823, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 47.62972519366363, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": 0.5775616765022278, |
|
"logits/rejected": 0.603803277015686, |
|
"logps/chosen": -378.0145568847656, |
|
"logps/rejected": -548.2694702148438, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5502240657806396, |
|
"rewards/margins": 1.536120057106018, |
|
"rewards/rejected": -3.086343765258789, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 43.81658284237901, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": 0.6224797368049622, |
|
"logits/rejected": 0.6658841371536255, |
|
"logps/chosen": -455.75555419921875, |
|
"logps/rejected": -602.9154663085938, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0334808826446533, |
|
"rewards/margins": 1.5407496690750122, |
|
"rewards/rejected": -3.574230670928955, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 26.2080571697754, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": 0.5944604873657227, |
|
"logits/rejected": 0.6213740110397339, |
|
"logps/chosen": -458.01312255859375, |
|
"logps/rejected": -585.683349609375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6532799005508423, |
|
"rewards/margins": 0.9707270860671997, |
|
"rewards/rejected": -2.624006748199463, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 55.389028996175746, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": 0.6714688539505005, |
|
"logits/rejected": 0.5972020030021667, |
|
"logps/chosen": -501.0025329589844, |
|
"logps/rejected": -521.9111938476562, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.590149998664856, |
|
"rewards/margins": 0.6970120072364807, |
|
"rewards/rejected": -2.2871620655059814, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 32.72649040496507, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": 0.5367740392684937, |
|
"logits/rejected": 0.5260412693023682, |
|
"logps/chosen": -512.4938354492188, |
|
"logps/rejected": -691.9700927734375, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8918201923370361, |
|
"rewards/margins": 2.05039119720459, |
|
"rewards/rejected": -3.942211627960205, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 41.73724251859538, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": 0.7122509479522705, |
|
"logits/rejected": 0.6869832277297974, |
|
"logps/chosen": -482.57952880859375, |
|
"logps/rejected": -656.2966918945312, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6301801204681396, |
|
"rewards/margins": 2.3234925270080566, |
|
"rewards/rejected": -3.953672409057617, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 40.29970530677951, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": 0.6537809371948242, |
|
"logits/rejected": 0.6641941070556641, |
|
"logps/chosen": -462.24432373046875, |
|
"logps/rejected": -581.655029296875, |
|
"loss": 0.5032, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6611082553863525, |
|
"rewards/margins": 1.1628700494766235, |
|
"rewards/rejected": -2.8239779472351074, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 42.46853911862954, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": 0.6924387216567993, |
|
"logits/rejected": 0.6893667578697205, |
|
"logps/chosen": -396.61761474609375, |
|
"logps/rejected": -547.9915771484375, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5040003061294556, |
|
"rewards/margins": 1.3634313344955444, |
|
"rewards/rejected": -2.867431640625, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5339851984021302, |
|
"train_runtime": 6933.6865, |
|
"train_samples_per_second": 19.235, |
|
"train_steps_per_second": 0.601 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|