|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 5.356178331285126, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6583542823791504, |
|
"logits/rejected": -2.612396240234375, |
|
"logps/chosen": -310.2690124511719, |
|
"logps/rejected": -241.6248321533203, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -4.61353047285229e-05, |
|
"rewards/margins": -0.00015705036639701575, |
|
"rewards/rejected": 0.00011091506894445047, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 6.4233925318831595, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.691195011138916, |
|
"logits/rejected": -2.6153342723846436, |
|
"logps/chosen": -293.5455627441406, |
|
"logps/rejected": -265.6838684082031, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.001484546228311956, |
|
"rewards/margins": 0.002768759150058031, |
|
"rewards/rejected": -0.0012842128053307533, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 5.149124678509347, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6977083683013916, |
|
"logits/rejected": -2.63045072555542, |
|
"logps/chosen": -277.82159423828125, |
|
"logps/rejected": -297.18646240234375, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.004203228745609522, |
|
"rewards/margins": 0.009881972335278988, |
|
"rewards/rejected": -0.005678744055330753, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 6.002207032235101, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.616579294204712, |
|
"logits/rejected": -2.5455870628356934, |
|
"logps/chosen": -283.92156982421875, |
|
"logps/rejected": -259.82562255859375, |
|
"loss": 0.6798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.036965593695640564, |
|
"rewards/margins": 0.04610789567232132, |
|
"rewards/rejected": -0.009142300114035606, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 5.926817590245787, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.596590518951416, |
|
"logits/rejected": -2.512640953063965, |
|
"logps/chosen": -285.3323669433594, |
|
"logps/rejected": -247.4479522705078, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.006985962390899658, |
|
"rewards/margins": 0.058415599167346954, |
|
"rewards/rejected": -0.06540156155824661, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.607215166091919, |
|
"eval_logits/rejected": -2.5074896812438965, |
|
"eval_logps/chosen": -286.6437683105469, |
|
"eval_logps/rejected": -258.6246032714844, |
|
"eval_loss": 0.6559526920318604, |
|
"eval_rewards/accuracies": 0.6724137663841248, |
|
"eval_rewards/chosen": -0.026378028094768524, |
|
"eval_rewards/margins": 0.10339301824569702, |
|
"eval_rewards/rejected": -0.12977103888988495, |
|
"eval_runtime": 92.1507, |
|
"eval_samples_per_second": 19.729, |
|
"eval_steps_per_second": 0.315, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 7.494952728753531, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.582334518432617, |
|
"logits/rejected": -2.508467197418213, |
|
"logps/chosen": -292.1842346191406, |
|
"logps/rejected": -282.423583984375, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11595962941646576, |
|
"rewards/margins": 0.1907343566417694, |
|
"rewards/rejected": -0.306693971157074, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 18.148816686471342, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.459238052368164, |
|
"logits/rejected": -2.3897058963775635, |
|
"logps/chosen": -298.2831115722656, |
|
"logps/rejected": -273.2386474609375, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.29933103919029236, |
|
"rewards/margins": 0.23945657908916473, |
|
"rewards/rejected": -0.5387876629829407, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 12.734144337443169, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.5085086822509766, |
|
"logits/rejected": -2.3976407051086426, |
|
"logps/chosen": -305.76031494140625, |
|
"logps/rejected": -321.8554992675781, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.3573322296142578, |
|
"rewards/margins": 0.28428393602371216, |
|
"rewards/rejected": -0.6416162252426147, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 14.039079346644037, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -1.4997788667678833, |
|
"logits/rejected": -1.313194990158081, |
|
"logps/chosen": -348.44805908203125, |
|
"logps/rejected": -361.76226806640625, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.44265589118003845, |
|
"rewards/margins": 0.4234777092933655, |
|
"rewards/rejected": -0.8661335706710815, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 13.29279140070498, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -0.19194559752941132, |
|
"logits/rejected": 0.2622618079185486, |
|
"logps/chosen": -339.16339111328125, |
|
"logps/rejected": -359.37176513671875, |
|
"loss": 0.581, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.598974347114563, |
|
"rewards/margins": 0.4787676930427551, |
|
"rewards/rejected": -1.0777419805526733, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": 0.026995467022061348, |
|
"eval_logits/rejected": 0.6340460777282715, |
|
"eval_logps/chosen": -357.115966796875, |
|
"eval_logps/rejected": -377.3665771484375, |
|
"eval_loss": 0.5763944387435913, |
|
"eval_rewards/accuracies": 0.7155172228813171, |
|
"eval_rewards/chosen": -0.7311002016067505, |
|
"eval_rewards/margins": 0.5860908627510071, |
|
"eval_rewards/rejected": -1.3171910047531128, |
|
"eval_runtime": 91.0093, |
|
"eval_samples_per_second": 19.976, |
|
"eval_steps_per_second": 0.319, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 27.36521925016087, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -0.14074298739433289, |
|
"logits/rejected": 0.41164666414260864, |
|
"logps/chosen": -359.0007019042969, |
|
"logps/rejected": -422.62353515625, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6176259517669678, |
|
"rewards/margins": 0.7909212708473206, |
|
"rewards/rejected": -1.4085471630096436, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 18.22825267425928, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": 0.28136759996414185, |
|
"logits/rejected": 1.2520945072174072, |
|
"logps/chosen": -414.25665283203125, |
|
"logps/rejected": -428.6090393066406, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0384491682052612, |
|
"rewards/margins": 0.647238552570343, |
|
"rewards/rejected": -1.6856876611709595, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 18.72996488177851, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": 1.1580041646957397, |
|
"logits/rejected": 1.9673328399658203, |
|
"logps/chosen": -384.8316650390625, |
|
"logps/rejected": -440.20672607421875, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9717643857002258, |
|
"rewards/margins": 0.8623247146606445, |
|
"rewards/rejected": -1.8340890407562256, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 18.77533851044078, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": 0.9655276536941528, |
|
"logits/rejected": 1.986130952835083, |
|
"logps/chosen": -377.4757995605469, |
|
"logps/rejected": -408.6956481933594, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9340255856513977, |
|
"rewards/margins": 0.7136737704277039, |
|
"rewards/rejected": -1.6476993560791016, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 22.441752676286086, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": 1.651755928993225, |
|
"logits/rejected": 2.6961984634399414, |
|
"logps/chosen": -394.5315856933594, |
|
"logps/rejected": -437.6512756347656, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0381582975387573, |
|
"rewards/margins": 0.7305435538291931, |
|
"rewards/rejected": -1.7687019109725952, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": 2.0827815532684326, |
|
"eval_logits/rejected": 3.0035645961761475, |
|
"eval_logps/chosen": -404.3199157714844, |
|
"eval_logps/rejected": -442.60711669921875, |
|
"eval_loss": 0.5509841442108154, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -1.203139305114746, |
|
"eval_rewards/margins": 0.7664569616317749, |
|
"eval_rewards/rejected": -1.9695963859558105, |
|
"eval_runtime": 90.3932, |
|
"eval_samples_per_second": 20.112, |
|
"eval_steps_per_second": 0.321, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 24.238500011603442, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": 1.6490274667739868, |
|
"logits/rejected": 2.5100581645965576, |
|
"logps/chosen": -409.46240234375, |
|
"logps/rejected": -448.33001708984375, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1305733919143677, |
|
"rewards/margins": 0.8016298413276672, |
|
"rewards/rejected": -1.9322032928466797, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 29.076032215796957, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": 1.5585577487945557, |
|
"logits/rejected": 2.380032777786255, |
|
"logps/chosen": -372.0144958496094, |
|
"logps/rejected": -400.96905517578125, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0608928203582764, |
|
"rewards/margins": 0.7344645261764526, |
|
"rewards/rejected": -1.795357346534729, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 23.777603972721764, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": 1.052141785621643, |
|
"logits/rejected": 1.8935604095458984, |
|
"logps/chosen": -356.8204650878906, |
|
"logps/rejected": -432.20001220703125, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9667918086051941, |
|
"rewards/margins": 0.9197394251823425, |
|
"rewards/rejected": -1.886531114578247, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 20.231853124698564, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": 1.569053292274475, |
|
"logits/rejected": 2.5012192726135254, |
|
"logps/chosen": -349.7216491699219, |
|
"logps/rejected": -458.28955078125, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9105401039123535, |
|
"rewards/margins": 1.0454990863800049, |
|
"rewards/rejected": -1.9560391902923584, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 20.18742592623794, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": 2.6468214988708496, |
|
"logits/rejected": 3.313246965408325, |
|
"logps/chosen": -413.1968688964844, |
|
"logps/rejected": -454.881591796875, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.473356008529663, |
|
"rewards/margins": 0.712754487991333, |
|
"rewards/rejected": -2.186110258102417, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": 1.7577229738235474, |
|
"eval_logits/rejected": 2.7758734226226807, |
|
"eval_logps/chosen": -400.7710876464844, |
|
"eval_logps/rejected": -449.201904296875, |
|
"eval_loss": 0.5381261706352234, |
|
"eval_rewards/accuracies": 0.7112069129943848, |
|
"eval_rewards/chosen": -1.1676514148712158, |
|
"eval_rewards/margins": 0.8678924441337585, |
|
"eval_rewards/rejected": -2.03554368019104, |
|
"eval_runtime": 90.283, |
|
"eval_samples_per_second": 20.137, |
|
"eval_steps_per_second": 0.321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 21.096800994630236, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": 2.1683189868927, |
|
"logits/rejected": 2.6720829010009766, |
|
"logps/chosen": -401.7050476074219, |
|
"logps/rejected": -487.34161376953125, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2490909099578857, |
|
"rewards/margins": 0.7777953743934631, |
|
"rewards/rejected": -2.026886463165283, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 35.955511790614246, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": 1.9502754211425781, |
|
"logits/rejected": 2.887373447418213, |
|
"logps/chosen": -407.0714111328125, |
|
"logps/rejected": -475.75860595703125, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.3871901035308838, |
|
"rewards/margins": 0.8300696611404419, |
|
"rewards/rejected": -2.2172598838806152, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 21.81682834473053, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": 2.0954604148864746, |
|
"logits/rejected": 3.134028673171997, |
|
"logps/chosen": -393.80865478515625, |
|
"logps/rejected": -481.6973571777344, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1964021921157837, |
|
"rewards/margins": 1.084702968597412, |
|
"rewards/rejected": -2.281104803085327, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 20.331534801215742, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 2.4693617820739746, |
|
"logits/rejected": 2.7029402256011963, |
|
"logps/chosen": -397.209716796875, |
|
"logps/rejected": -480.30621337890625, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.274371862411499, |
|
"rewards/margins": 0.8711179494857788, |
|
"rewards/rejected": -2.1454896926879883, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 21.16814139127329, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": 2.334216356277466, |
|
"logits/rejected": 3.1122984886169434, |
|
"logps/chosen": -399.35968017578125, |
|
"logps/rejected": -462.42877197265625, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.344590425491333, |
|
"rewards/margins": 0.8345645070075989, |
|
"rewards/rejected": -2.179154872894287, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": 1.8166545629501343, |
|
"eval_logits/rejected": 2.9561386108398438, |
|
"eval_logps/chosen": -392.5903015136719, |
|
"eval_logps/rejected": -442.3040771484375, |
|
"eval_loss": 0.5333030819892883, |
|
"eval_rewards/accuracies": 0.7198275923728943, |
|
"eval_rewards/chosen": -1.0858436822891235, |
|
"eval_rewards/margins": 0.8807222843170166, |
|
"eval_rewards/rejected": -1.9665659666061401, |
|
"eval_runtime": 91.6089, |
|
"eval_samples_per_second": 19.845, |
|
"eval_steps_per_second": 0.317, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 24.05630881187602, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": 2.180393934249878, |
|
"logits/rejected": 3.2447829246520996, |
|
"logps/chosen": -416.7367248535156, |
|
"logps/rejected": -477.38671875, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.1423505544662476, |
|
"rewards/margins": 1.0397279262542725, |
|
"rewards/rejected": -2.1820783615112305, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 16.426211814362816, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": 2.4754998683929443, |
|
"logits/rejected": 3.3202342987060547, |
|
"logps/chosen": -418.9905700683594, |
|
"logps/rejected": -466.9713439941406, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4273664951324463, |
|
"rewards/margins": 0.7679312229156494, |
|
"rewards/rejected": -2.1952977180480957, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 25.36799111369545, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 2.9461216926574707, |
|
"logits/rejected": 3.8612606525421143, |
|
"logps/chosen": -443.60198974609375, |
|
"logps/rejected": -535.1948852539062, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.662767767906189, |
|
"rewards/margins": 0.998543918132782, |
|
"rewards/rejected": -2.6613118648529053, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 15.931208067906516, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 2.2281603813171387, |
|
"logits/rejected": 3.0743608474731445, |
|
"logps/chosen": -415.99908447265625, |
|
"logps/rejected": -480.72003173828125, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2590678930282593, |
|
"rewards/margins": 0.8066269159317017, |
|
"rewards/rejected": -2.065694808959961, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 18.614598999130695, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": 2.0644378662109375, |
|
"logits/rejected": 3.2977874279022217, |
|
"logps/chosen": -393.56756591796875, |
|
"logps/rejected": -459.68646240234375, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0834629535675049, |
|
"rewards/margins": 1.0138219594955444, |
|
"rewards/rejected": -2.097285032272339, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": 2.0026185512542725, |
|
"eval_logits/rejected": 3.223935604095459, |
|
"eval_logps/chosen": -388.63787841796875, |
|
"eval_logps/rejected": -442.7093200683594, |
|
"eval_loss": 0.5265418291091919, |
|
"eval_rewards/accuracies": 0.7068965435028076, |
|
"eval_rewards/chosen": -1.0463188886642456, |
|
"eval_rewards/margins": 0.9242996573448181, |
|
"eval_rewards/rejected": -1.970618486404419, |
|
"eval_runtime": 90.447, |
|
"eval_samples_per_second": 20.1, |
|
"eval_steps_per_second": 0.321, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 25.782071483124422, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": 2.8770992755889893, |
|
"logits/rejected": 3.6848435401916504, |
|
"logps/chosen": -387.76580810546875, |
|
"logps/rejected": -468.38275146484375, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.336073875427246, |
|
"rewards/margins": 0.9252589344978333, |
|
"rewards/rejected": -2.2613327503204346, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 23.531042495765035, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 2.587601900100708, |
|
"logits/rejected": 3.9543087482452393, |
|
"logps/chosen": -440.2958984375, |
|
"logps/rejected": -498.0613708496094, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3440136909484863, |
|
"rewards/margins": 1.1366775035858154, |
|
"rewards/rejected": -2.4806911945343018, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 22.178841978203927, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 2.5279412269592285, |
|
"logits/rejected": 3.4965198040008545, |
|
"logps/chosen": -422.66168212890625, |
|
"logps/rejected": -501.438720703125, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4393374919891357, |
|
"rewards/margins": 0.8559640645980835, |
|
"rewards/rejected": -2.295301914215088, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 19.61314237963683, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 2.785928726196289, |
|
"logits/rejected": 3.915510892868042, |
|
"logps/chosen": -388.799072265625, |
|
"logps/rejected": -494.65606689453125, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.265873670578003, |
|
"rewards/margins": 1.0917268991470337, |
|
"rewards/rejected": -2.357600450515747, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 22.588827480706584, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 2.3770060539245605, |
|
"logits/rejected": 4.068874835968018, |
|
"logps/chosen": -419.5255432128906, |
|
"logps/rejected": -510.02911376953125, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2834579944610596, |
|
"rewards/margins": 1.3700745105743408, |
|
"rewards/rejected": -2.6535322666168213, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": 2.348414182662964, |
|
"eval_logits/rejected": 3.6065878868103027, |
|
"eval_logps/chosen": -417.5965881347656, |
|
"eval_logps/rejected": -477.5577392578125, |
|
"eval_loss": 0.5262271761894226, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -1.3359062671661377, |
|
"eval_rewards/margins": 0.9831959009170532, |
|
"eval_rewards/rejected": -2.3191022872924805, |
|
"eval_runtime": 91.8801, |
|
"eval_samples_per_second": 19.787, |
|
"eval_steps_per_second": 0.316, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 22.898724036504742, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 2.4756264686584473, |
|
"logits/rejected": 3.231902599334717, |
|
"logps/chosen": -419.9034118652344, |
|
"logps/rejected": -510.82781982421875, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.401601791381836, |
|
"rewards/margins": 1.0482218265533447, |
|
"rewards/rejected": -2.4498236179351807, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 21.290872916140614, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 1.8261902332305908, |
|
"logits/rejected": 3.2766151428222656, |
|
"logps/chosen": -459.34906005859375, |
|
"logps/rejected": -512.47314453125, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2751537561416626, |
|
"rewards/margins": 1.1164480447769165, |
|
"rewards/rejected": -2.391602039337158, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 20.41896976274452, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 2.1876559257507324, |
|
"logits/rejected": 3.5514347553253174, |
|
"logps/chosen": -429.52801513671875, |
|
"logps/rejected": -452.6607360839844, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.362518548965454, |
|
"rewards/margins": 0.9127564430236816, |
|
"rewards/rejected": -2.2752749919891357, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 20.106111939027084, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 1.5682854652404785, |
|
"logits/rejected": 3.198239803314209, |
|
"logps/chosen": -423.41143798828125, |
|
"logps/rejected": -513.44140625, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1543933153152466, |
|
"rewards/margins": 1.4496588706970215, |
|
"rewards/rejected": -2.6040520668029785, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 22.36268387575501, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 2.2944397926330566, |
|
"logits/rejected": 3.2362308502197266, |
|
"logps/chosen": -427.0224609375, |
|
"logps/rejected": -509.18438720703125, |
|
"loss": 0.5267, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.3516565561294556, |
|
"rewards/margins": 0.9079391360282898, |
|
"rewards/rejected": -2.2595956325531006, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 1.9855237007141113, |
|
"eval_logits/rejected": 3.3069264888763428, |
|
"eval_logps/chosen": -402.9078674316406, |
|
"eval_logps/rejected": -463.85418701171875, |
|
"eval_loss": 0.5237594246864319, |
|
"eval_rewards/accuracies": 0.7241379022598267, |
|
"eval_rewards/chosen": -1.189018964767456, |
|
"eval_rewards/margins": 0.9930478930473328, |
|
"eval_rewards/rejected": -2.1820664405822754, |
|
"eval_runtime": 90.561, |
|
"eval_samples_per_second": 20.075, |
|
"eval_steps_per_second": 0.32, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 20.20141424383877, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 2.3932690620422363, |
|
"logits/rejected": 3.2205722332000732, |
|
"logps/chosen": -426.5123596191406, |
|
"logps/rejected": -476.37139892578125, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.391105055809021, |
|
"rewards/margins": 0.8132905960083008, |
|
"rewards/rejected": -2.2043957710266113, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 20.629666257184397, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 2.141890048980713, |
|
"logits/rejected": 3.76823091506958, |
|
"logps/chosen": -436.96722412109375, |
|
"logps/rejected": -471.711181640625, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.3215954303741455, |
|
"rewards/margins": 1.0597209930419922, |
|
"rewards/rejected": -2.381316661834717, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 17.42236283649955, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 2.458095073699951, |
|
"logits/rejected": 3.361394166946411, |
|
"logps/chosen": -389.62994384765625, |
|
"logps/rejected": -474.5247497558594, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2771459817886353, |
|
"rewards/margins": 0.9393760561943054, |
|
"rewards/rejected": -2.216521739959717, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5659637576943144, |
|
"train_runtime": 11398.0027, |
|
"train_samples_per_second": 4.892, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|