|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.578385353088379, |
|
"logits/rejected": -2.53226900100708, |
|
"logps/chosen": -286.13739013671875, |
|
"logps/rejected": -212.73016357421875, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.468435525894165, |
|
"logits/rejected": -2.5060648918151855, |
|
"logps/chosen": -258.7095947265625, |
|
"logps/rejected": -233.5037384033203, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.3819444477558136, |
|
"rewards/chosen": -7.664680015295744e-05, |
|
"rewards/margins": 6.3225775193131994e-06, |
|
"rewards/rejected": -8.296939631691203e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.346015214920044, |
|
"logits/rejected": -2.4067437648773193, |
|
"logps/chosen": -196.97122192382812, |
|
"logps/rejected": -193.7008056640625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": 6.571458652615547e-05, |
|
"rewards/margins": -3.4166391742473934e-06, |
|
"rewards/rejected": 6.913123070262372e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.4858970642089844, |
|
"logits/rejected": -2.451706886291504, |
|
"logps/chosen": -236.32901000976562, |
|
"logps/rejected": -208.12997436523438, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0012510574888437986, |
|
"rewards/margins": 0.00014562405704054981, |
|
"rewards/rejected": 0.0011054335627704859, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.4558634757995605, |
|
"logits/rejected": -2.477804183959961, |
|
"logps/chosen": -242.181640625, |
|
"logps/rejected": -234.55661010742188, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0018630999838933349, |
|
"rewards/margins": 0.0003440978180151433, |
|
"rewards/rejected": 0.001519002253189683, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.489288806915283, |
|
"logits/rejected": -2.5008156299591064, |
|
"logps/chosen": -235.43099975585938, |
|
"logps/rejected": -222.29641723632812, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0019815764389932156, |
|
"rewards/margins": 0.0009142985800281167, |
|
"rewards/rejected": 0.0010672778589650989, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.450463056564331, |
|
"logits/rejected": -2.443624258041382, |
|
"logps/chosen": -256.23590087890625, |
|
"logps/rejected": -230.0203094482422, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004073253367096186, |
|
"rewards/margins": 0.001231834408827126, |
|
"rewards/rejected": 0.002841418841853738, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.459900379180908, |
|
"logits/rejected": -2.4804420471191406, |
|
"logps/chosen": -251.94174194335938, |
|
"logps/rejected": -234.4181671142578, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.004830378107726574, |
|
"rewards/margins": 0.0021367089357227087, |
|
"rewards/rejected": 0.002693668706342578, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.4172418117523193, |
|
"logits/rejected": -2.3813605308532715, |
|
"logps/chosen": -235.5308074951172, |
|
"logps/rejected": -215.6710968017578, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.006930059753358364, |
|
"rewards/margins": 0.002805978525429964, |
|
"rewards/rejected": 0.004124081693589687, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.4015233516693115, |
|
"logits/rejected": -2.3940534591674805, |
|
"logps/chosen": -226.13137817382812, |
|
"logps/rejected": -213.4936981201172, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.00892677903175354, |
|
"rewards/margins": 0.004470665007829666, |
|
"rewards/rejected": 0.0044561149552464485, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.382424831390381, |
|
"logits/rejected": -2.3642446994781494, |
|
"logps/chosen": -204.63381958007812, |
|
"logps/rejected": -217.92910766601562, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.010236050002276897, |
|
"rewards/margins": 0.00466396939009428, |
|
"rewards/rejected": 0.005572080612182617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -2.395820140838623, |
|
"logits/rejected": -2.407099485397339, |
|
"logps/chosen": -215.1260223388672, |
|
"logps/rejected": -210.58309936523438, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.010834300890564919, |
|
"rewards/margins": 0.006746213883161545, |
|
"rewards/rejected": 0.004088086076080799, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.361361503601074, |
|
"logits/rejected": -2.4017128944396973, |
|
"logps/chosen": -208.5069122314453, |
|
"logps/rejected": -193.96817016601562, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.009557174518704414, |
|
"rewards/margins": 0.00605000089854002, |
|
"rewards/rejected": 0.0035071733873337507, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -2.3745360374450684, |
|
"logits/rejected": -2.3689522743225098, |
|
"logps/chosen": -226.9114990234375, |
|
"logps/rejected": -210.3325653076172, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.010273845866322517, |
|
"rewards/margins": 0.005562370643019676, |
|
"rewards/rejected": 0.004711476154625416, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.39690899848938, |
|
"logits/rejected": -2.4031527042388916, |
|
"logps/chosen": -207.76968383789062, |
|
"logps/rejected": -206.6008758544922, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.011523631401360035, |
|
"rewards/margins": 0.009559462778270245, |
|
"rewards/rejected": 0.0019641686230897903, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -2.2971677780151367, |
|
"logits/rejected": -2.3256301879882812, |
|
"logps/chosen": -223.7987823486328, |
|
"logps/rejected": -211.0409393310547, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.007091984152793884, |
|
"rewards/margins": 0.009901536628603935, |
|
"rewards/rejected": -0.002809552475810051, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.3667495250701904, |
|
"logits/rejected": -2.2750391960144043, |
|
"logps/chosen": -207.93814086914062, |
|
"logps/rejected": -217.41806030273438, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.00839292537420988, |
|
"rewards/margins": 0.015968123450875282, |
|
"rewards/rejected": -0.0075751966796815395, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -2.3504929542541504, |
|
"logits/rejected": -2.328986644744873, |
|
"logps/chosen": -262.8653259277344, |
|
"logps/rejected": -238.43017578125, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.005286640953272581, |
|
"rewards/margins": 0.01585621014237404, |
|
"rewards/rejected": -0.010569569654762745, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -2.2022526264190674, |
|
"logits/rejected": -2.163339138031006, |
|
"logps/chosen": -211.5063018798828, |
|
"logps/rejected": -248.37081909179688, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.00028603168902918696, |
|
"rewards/margins": 0.02056037448346615, |
|
"rewards/rejected": -0.020274341106414795, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -2.2854294776916504, |
|
"logits/rejected": -2.230767011642456, |
|
"logps/chosen": -295.3899230957031, |
|
"logps/rejected": -293.7907409667969, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.026039790362119675, |
|
"rewards/margins": 0.04283389076590538, |
|
"rewards/rejected": -0.06887368112802505, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -2.1649932861328125, |
|
"logits/rejected": -2.122584819793701, |
|
"logps/chosen": -251.9384307861328, |
|
"logps/rejected": -293.52923583984375, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.030307698994874954, |
|
"rewards/margins": 0.03713207319378853, |
|
"rewards/rejected": -0.06743976473808289, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -2.0010428428649902, |
|
"logits/rejected": -1.9664274454116821, |
|
"logps/chosen": -352.85986328125, |
|
"logps/rejected": -415.43768310546875, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.11694659292697906, |
|
"rewards/margins": 0.0868750587105751, |
|
"rewards/rejected": -0.20382165908813477, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -1.5235364437103271, |
|
"logits/rejected": -1.551948070526123, |
|
"logps/chosen": -565.6112060546875, |
|
"logps/rejected": -687.0833129882812, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.3442026376724243, |
|
"rewards/margins": 0.13101640343666077, |
|
"rewards/rejected": -0.47521907091140747, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -1.5635735988616943, |
|
"logits/rejected": -1.5089839696884155, |
|
"logps/chosen": -867.3118286132812, |
|
"logps/rejected": -959.4519653320312, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.6232264041900635, |
|
"rewards/margins": 0.11986882984638214, |
|
"rewards/rejected": -0.7430952787399292, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -1.8420759439468384, |
|
"logits/rejected": -1.7493212223052979, |
|
"logps/chosen": -739.1602783203125, |
|
"logps/rejected": -1018.7802734375, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5061613321304321, |
|
"rewards/margins": 0.31740203499794006, |
|
"rewards/rejected": -0.8235633969306946, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -1.7359821796417236, |
|
"logits/rejected": -1.664820909500122, |
|
"logps/chosen": -867.6565551757812, |
|
"logps/rejected": -1373.392333984375, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6370395421981812, |
|
"rewards/margins": 0.5256737470626831, |
|
"rewards/rejected": -1.1627132892608643, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -1.8760831356048584, |
|
"logits/rejected": -1.84027898311615, |
|
"logps/chosen": -652.6282958984375, |
|
"logps/rejected": -1129.7745361328125, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.47100549936294556, |
|
"rewards/margins": 0.4714561402797699, |
|
"rewards/rejected": -0.9424616694450378, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -1.8029680252075195, |
|
"logits/rejected": -1.7324800491333008, |
|
"logps/chosen": -1249.8055419921875, |
|
"logps/rejected": -1807.9056396484375, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.0133898258209229, |
|
"rewards/margins": 0.5813573002815247, |
|
"rewards/rejected": -1.5947470664978027, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -1.9399795532226562, |
|
"logits/rejected": -1.8146251440048218, |
|
"logps/chosen": -902.8073120117188, |
|
"logps/rejected": -1436.665283203125, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6662728786468506, |
|
"rewards/margins": 0.5495506525039673, |
|
"rewards/rejected": -1.2158234119415283, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -1.8842103481292725, |
|
"logits/rejected": -1.7202155590057373, |
|
"logps/chosen": -1762.968994140625, |
|
"logps/rejected": -1725.986572265625, |
|
"loss": 0.4717, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.510768175125122, |
|
"rewards/margins": 0.030063262209296227, |
|
"rewards/rejected": -1.540831446647644, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -2.0325398445129395, |
|
"logits/rejected": -1.8330217599868774, |
|
"logps/chosen": -1008.1095581054688, |
|
"logps/rejected": -1700.26171875, |
|
"loss": 0.4671, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7875405550003052, |
|
"rewards/margins": 0.731080174446106, |
|
"rewards/rejected": -1.5186206102371216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -1.969412088394165, |
|
"logits/rejected": -1.8437814712524414, |
|
"logps/chosen": -798.6140747070312, |
|
"logps/rejected": -1872.577392578125, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5798918604850769, |
|
"rewards/margins": 1.0649895668029785, |
|
"rewards/rejected": -1.6448814868927002, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -2.004281759262085, |
|
"logits/rejected": -1.8325812816619873, |
|
"logps/chosen": -1020.1710205078125, |
|
"logps/rejected": -2015.131591796875, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8064894676208496, |
|
"rewards/margins": 0.9903135299682617, |
|
"rewards/rejected": -1.7968031167984009, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -2.0901331901550293, |
|
"logits/rejected": -1.9698684215545654, |
|
"logps/chosen": -915.0389404296875, |
|
"logps/rejected": -1435.4764404296875, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6817190051078796, |
|
"rewards/margins": 0.5508195161819458, |
|
"rewards/rejected": -1.2325387001037598, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -1.9757936000823975, |
|
"logits/rejected": -1.897470474243164, |
|
"logps/chosen": -1229.423828125, |
|
"logps/rejected": -1853.947265625, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.007495641708374, |
|
"rewards/margins": 0.6511304974555969, |
|
"rewards/rejected": -1.6586261987686157, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -1.968062162399292, |
|
"logits/rejected": -1.8332984447479248, |
|
"logps/chosen": -1563.551025390625, |
|
"logps/rejected": -2647.005615234375, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3222496509552002, |
|
"rewards/margins": 1.0978131294250488, |
|
"rewards/rejected": -2.42006254196167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -2.2060952186584473, |
|
"logits/rejected": -2.1268954277038574, |
|
"logps/chosen": -1001.3084106445312, |
|
"logps/rejected": -1724.416015625, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7821061611175537, |
|
"rewards/margins": 0.7285407781600952, |
|
"rewards/rejected": -1.5106468200683594, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -2.1300292015075684, |
|
"logits/rejected": -2.000924587249756, |
|
"logps/chosen": -1243.8800048828125, |
|
"logps/rejected": -2480.47021484375, |
|
"loss": 0.4514, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0223863124847412, |
|
"rewards/margins": 1.2320432662963867, |
|
"rewards/rejected": -2.254429578781128, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -2.221879243850708, |
|
"logits/rejected": -2.128418207168579, |
|
"logps/chosen": -875.8338623046875, |
|
"logps/rejected": -1593.1365966796875, |
|
"loss": 0.4588, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.6446818113327026, |
|
"rewards/margins": 0.7323214411735535, |
|
"rewards/rejected": -1.3770033121109009, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -2.0890746116638184, |
|
"logits/rejected": -1.9689449071884155, |
|
"logps/chosen": -1064.7686767578125, |
|
"logps/rejected": -1700.5394287109375, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.8423658609390259, |
|
"rewards/margins": 0.6700539588928223, |
|
"rewards/rejected": -1.5124199390411377, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -2.2589426040649414, |
|
"logits/rejected": -2.160431385040283, |
|
"logps/chosen": -959.44140625, |
|
"logps/rejected": -1434.484130859375, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.69480961561203, |
|
"rewards/margins": 0.5226942896842957, |
|
"rewards/rejected": -1.2175039052963257, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": -2.2115917205810547, |
|
"logits/rejected": -2.127436399459839, |
|
"logps/chosen": -1019.97509765625, |
|
"logps/rejected": -1477.9644775390625, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7589401602745056, |
|
"rewards/margins": 0.49645981192588806, |
|
"rewards/rejected": -1.2553999423980713, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -1.9758100509643555, |
|
"logits/rejected": -1.8129494190216064, |
|
"logps/chosen": -1256.257568359375, |
|
"logps/rejected": -2412.14794921875, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0521903038024902, |
|
"rewards/margins": 1.142812728881836, |
|
"rewards/rejected": -2.1950032711029053, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": -2.1546552181243896, |
|
"logits/rejected": -2.0900943279266357, |
|
"logps/chosen": -718.0090942382812, |
|
"logps/rejected": -1487.015380859375, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.5029612183570862, |
|
"rewards/margins": 0.7773032188415527, |
|
"rewards/rejected": -1.2802644968032837, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -2.1480519771575928, |
|
"logits/rejected": -2.071498394012451, |
|
"logps/chosen": -1282.3341064453125, |
|
"logps/rejected": -2054.326171875, |
|
"loss": 0.4626, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.0578222274780273, |
|
"rewards/margins": 0.7977155447006226, |
|
"rewards/rejected": -1.855538010597229, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": -1.928739309310913, |
|
"logits/rejected": -1.8378665447235107, |
|
"logps/chosen": -1254.6744384765625, |
|
"logps/rejected": -2223.1025390625, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.060748815536499, |
|
"rewards/margins": 0.9665641784667969, |
|
"rewards/rejected": -2.027312755584717, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -2.2325401306152344, |
|
"logits/rejected": -2.123627185821533, |
|
"logps/chosen": -882.6212768554688, |
|
"logps/rejected": -1591.8580322265625, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6440384387969971, |
|
"rewards/margins": 0.7315788269042969, |
|
"rewards/rejected": -1.3756173849105835, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": -2.02402663230896, |
|
"logits/rejected": -1.9017149209976196, |
|
"logps/chosen": -1743.1324462890625, |
|
"logps/rejected": -2763.71923828125, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.5182123184204102, |
|
"rewards/margins": 1.0456020832061768, |
|
"rewards/rejected": -2.563814401626587, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -2.046326160430908, |
|
"logits/rejected": -1.985815405845642, |
|
"logps/chosen": -1439.734130859375, |
|
"logps/rejected": -2124.520263671875, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.2466154098510742, |
|
"rewards/margins": 0.6888442039489746, |
|
"rewards/rejected": -1.935459852218628, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": -2.134103775024414, |
|
"logits/rejected": -2.0179543495178223, |
|
"logps/chosen": -1013.251953125, |
|
"logps/rejected": -2033.077392578125, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.815003514289856, |
|
"rewards/margins": 1.0394479036331177, |
|
"rewards/rejected": -1.8544514179229736, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -2.16344952583313, |
|
"logits/rejected": -1.9862359762191772, |
|
"logps/chosen": -1314.334228515625, |
|
"logps/rejected": -2926.594970703125, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0824334621429443, |
|
"rewards/margins": 1.6397478580474854, |
|
"rewards/rejected": -2.7221813201904297, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": -2.2062978744506836, |
|
"logits/rejected": -2.057356357574463, |
|
"logps/chosen": -1038.0162353515625, |
|
"logps/rejected": -2059.95458984375, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8147695660591125, |
|
"rewards/margins": 1.0522905588150024, |
|
"rewards/rejected": -1.8670603036880493, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -2.2159011363983154, |
|
"logits/rejected": -2.0826644897460938, |
|
"logps/chosen": -936.4302978515625, |
|
"logps/rejected": -1992.089599609375, |
|
"loss": 0.4483, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.6953409910202026, |
|
"rewards/margins": 1.08708918094635, |
|
"rewards/rejected": -1.7824300527572632, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": -2.2215633392333984, |
|
"logits/rejected": -2.053880214691162, |
|
"logps/chosen": -846.26611328125, |
|
"logps/rejected": -2448.76416015625, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5850510001182556, |
|
"rewards/margins": 1.622568130493164, |
|
"rewards/rejected": -2.2076191902160645, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -2.221585273742676, |
|
"logits/rejected": -2.0297319889068604, |
|
"logps/chosen": -1184.786865234375, |
|
"logps/rejected": -2758.436767578125, |
|
"loss": 0.4455, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9226329922676086, |
|
"rewards/margins": 1.6065568923950195, |
|
"rewards/rejected": -2.5291898250579834, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": -2.196733236312866, |
|
"logits/rejected": -2.057121992111206, |
|
"logps/chosen": -1445.136962890625, |
|
"logps/rejected": -2547.5927734375, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2020736932754517, |
|
"rewards/margins": 1.1371889114379883, |
|
"rewards/rejected": -2.3392627239227295, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -2.14131498336792, |
|
"logits/rejected": -2.0334861278533936, |
|
"logps/chosen": -913.9793090820312, |
|
"logps/rejected": -1585.7960205078125, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7021154165267944, |
|
"rewards/margins": 0.7157109975814819, |
|
"rewards/rejected": -1.417826533317566, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": -2.150709629058838, |
|
"logits/rejected": -2.051652431488037, |
|
"logps/chosen": -930.1282348632812, |
|
"logps/rejected": -1828.182861328125, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.7185366749763489, |
|
"rewards/margins": 0.9112586975097656, |
|
"rewards/rejected": -1.6297954320907593, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -2.165860891342163, |
|
"logits/rejected": -1.990290641784668, |
|
"logps/chosen": -1457.848388671875, |
|
"logps/rejected": -3095.86181640625, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2225459814071655, |
|
"rewards/margins": 1.659259557723999, |
|
"rewards/rejected": -2.881805658340454, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": -2.033405303955078, |
|
"logits/rejected": -1.9788004159927368, |
|
"logps/chosen": -1339.5030517578125, |
|
"logps/rejected": -1871.1273193359375, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.1169803142547607, |
|
"rewards/margins": 0.5490958094596863, |
|
"rewards/rejected": -1.6660760641098022, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -2.2021541595458984, |
|
"logits/rejected": -2.0622100830078125, |
|
"logps/chosen": -1060.39404296875, |
|
"logps/rejected": -2731.3662109375, |
|
"loss": 0.4515, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8206619024276733, |
|
"rewards/margins": 1.6797775030136108, |
|
"rewards/rejected": -2.5004396438598633, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": -2.200084686279297, |
|
"logits/rejected": -2.0629191398620605, |
|
"logps/chosen": -1195.5421142578125, |
|
"logps/rejected": -2445.993408203125, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.9617778658866882, |
|
"rewards/margins": 1.294762134552002, |
|
"rewards/rejected": -2.256540298461914, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -2.1470463275909424, |
|
"logits/rejected": -2.028573513031006, |
|
"logps/chosen": -1541.4970703125, |
|
"logps/rejected": -2454.397705078125, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.3200973272323608, |
|
"rewards/margins": 0.9341692924499512, |
|
"rewards/rejected": -2.2542667388916016, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": -2.1975948810577393, |
|
"logits/rejected": -2.060920476913452, |
|
"logps/chosen": -1427.9593505859375, |
|
"logps/rejected": -3144.806884765625, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.176941156387329, |
|
"rewards/margins": 1.7532637119293213, |
|
"rewards/rejected": -2.9302048683166504, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -2.156165838241577, |
|
"logits/rejected": -2.045762777328491, |
|
"logps/chosen": -1413.2603759765625, |
|
"logps/rejected": -3491.219970703125, |
|
"loss": 0.4488, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1946003437042236, |
|
"rewards/margins": 2.0828354358673096, |
|
"rewards/rejected": -3.2774360179901123, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": -2.2174124717712402, |
|
"logits/rejected": -2.088347911834717, |
|
"logps/chosen": -1448.646484375, |
|
"logps/rejected": -2385.66259765625, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2005541324615479, |
|
"rewards/margins": 0.9563320875167847, |
|
"rewards/rejected": -2.156886339187622, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -2.2460074424743652, |
|
"logits/rejected": -2.1426172256469727, |
|
"logps/chosen": -1455.069580078125, |
|
"logps/rejected": -2169.728515625, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.21938955783844, |
|
"rewards/margins": 0.7492297887802124, |
|
"rewards/rejected": -1.9686193466186523, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": -2.229182481765747, |
|
"logits/rejected": -2.115177631378174, |
|
"logps/chosen": -1379.844482421875, |
|
"logps/rejected": -2796.632568359375, |
|
"loss": 0.4498, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1495221853256226, |
|
"rewards/margins": 1.4514307975769043, |
|
"rewards/rejected": -2.6009533405303955, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -2.2372703552246094, |
|
"logits/rejected": -2.1571171283721924, |
|
"logps/chosen": -1203.8697509765625, |
|
"logps/rejected": -1857.9075927734375, |
|
"loss": 0.4523, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.9586626291275024, |
|
"rewards/margins": 0.6804057359695435, |
|
"rewards/rejected": -1.6390682458877563, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": -2.1900107860565186, |
|
"logits/rejected": -2.080841064453125, |
|
"logps/chosen": -1038.609619140625, |
|
"logps/rejected": -1972.8609619140625, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8293488621711731, |
|
"rewards/margins": 0.9573895335197449, |
|
"rewards/rejected": -1.7867381572723389, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -2.208099126815796, |
|
"logits/rejected": -2.082400321960449, |
|
"logps/chosen": -1469.6568603515625, |
|
"logps/rejected": -3010.545654296875, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.243025302886963, |
|
"rewards/margins": 1.5714446306228638, |
|
"rewards/rejected": -2.814469814300537, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": -2.220716953277588, |
|
"logits/rejected": -2.1422343254089355, |
|
"logps/chosen": -1299.326904296875, |
|
"logps/rejected": -2368.89111328125, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0415502786636353, |
|
"rewards/margins": 1.1175090074539185, |
|
"rewards/rejected": -2.1590590476989746, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -2.222707509994507, |
|
"logits/rejected": -2.1047911643981934, |
|
"logps/chosen": -1158.9268798828125, |
|
"logps/rejected": -2246.68212890625, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9390741586685181, |
|
"rewards/margins": 1.1018383502960205, |
|
"rewards/rejected": -2.040912389755249, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": -2.187948226928711, |
|
"logits/rejected": -2.105868101119995, |
|
"logps/chosen": -1287.493896484375, |
|
"logps/rejected": -2305.227783203125, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0323898792266846, |
|
"rewards/margins": 1.072412133216858, |
|
"rewards/rejected": -2.104801893234253, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -2.212477445602417, |
|
"logits/rejected": -2.127330780029297, |
|
"logps/chosen": -960.9691162109375, |
|
"logps/rejected": -2517.481689453125, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7053281664848328, |
|
"rewards/margins": 1.5753790140151978, |
|
"rewards/rejected": -2.2807071208953857, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": -2.230347156524658, |
|
"logits/rejected": -2.119199275970459, |
|
"logps/chosen": -1013.6222534179688, |
|
"logps/rejected": -2389.435302734375, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7983914017677307, |
|
"rewards/margins": 1.369321346282959, |
|
"rewards/rejected": -2.167712688446045, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -2.2451062202453613, |
|
"logits/rejected": -2.2009005546569824, |
|
"logps/chosen": -1014.0054931640625, |
|
"logps/rejected": -2509.18310546875, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7636991143226624, |
|
"rewards/margins": 1.5256783962249756, |
|
"rewards/rejected": -2.2893776893615723, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": -2.2545554637908936, |
|
"logits/rejected": -2.1696648597717285, |
|
"logps/chosen": -1256.4468994140625, |
|
"logps/rejected": -3355.20849609375, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9988948702812195, |
|
"rewards/margins": 2.1163055896759033, |
|
"rewards/rejected": -3.1152002811431885, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -2.236833333969116, |
|
"logits/rejected": -2.116788625717163, |
|
"logps/chosen": -1157.8489990234375, |
|
"logps/rejected": -2627.938720703125, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9315347671508789, |
|
"rewards/margins": 1.4900939464569092, |
|
"rewards/rejected": -2.421628475189209, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": -2.2656142711639404, |
|
"logits/rejected": -2.1838631629943848, |
|
"logps/chosen": -1444.336181640625, |
|
"logps/rejected": -2568.44873046875, |
|
"loss": 0.4565, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.2109791040420532, |
|
"rewards/margins": 1.1344887018203735, |
|
"rewards/rejected": -2.345468044281006, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -2.1305015087127686, |
|
"logits/rejected": -2.0338492393493652, |
|
"logps/chosen": -1384.163330078125, |
|
"logps/rejected": -3120.740234375, |
|
"loss": 0.44, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.1574374437332153, |
|
"rewards/margins": 1.7521774768829346, |
|
"rewards/rejected": -2.9096148014068604, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": -2.1786255836486816, |
|
"logits/rejected": -2.1090734004974365, |
|
"logps/chosen": -1811.431640625, |
|
"logps/rejected": -1879.762939453125, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.5657621622085571, |
|
"rewards/margins": 0.12173604965209961, |
|
"rewards/rejected": -1.6874980926513672, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -2.204667091369629, |
|
"logits/rejected": -2.1335368156433105, |
|
"logps/chosen": -1094.656982421875, |
|
"logps/rejected": -2983.633056640625, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8493936657905579, |
|
"rewards/margins": 1.935779333114624, |
|
"rewards/rejected": -2.785172939300537, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": -2.2261288166046143, |
|
"logits/rejected": -2.156919002532959, |
|
"logps/chosen": -1517.5009765625, |
|
"logps/rejected": -2918.010986328125, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2897742986679077, |
|
"rewards/margins": 1.39667546749115, |
|
"rewards/rejected": -2.6864495277404785, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -2.181530475616455, |
|
"logits/rejected": -2.1404881477355957, |
|
"logps/chosen": -1569.401611328125, |
|
"logps/rejected": -2147.50927734375, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.3682358264923096, |
|
"rewards/margins": 0.5883899331092834, |
|
"rewards/rejected": -1.9566256999969482, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": -2.213731050491333, |
|
"logits/rejected": -2.1309895515441895, |
|
"logps/chosen": -1591.1544189453125, |
|
"logps/rejected": -2607.8837890625, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.339202642440796, |
|
"rewards/margins": 1.0517059564590454, |
|
"rewards/rejected": -2.390908718109131, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -2.1618194580078125, |
|
"logits/rejected": -2.054898738861084, |
|
"logps/chosen": -1653.2855224609375, |
|
"logps/rejected": -3330.978515625, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.4334652423858643, |
|
"rewards/margins": 1.6885372400283813, |
|
"rewards/rejected": -3.122002601623535, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": -2.2253754138946533, |
|
"logits/rejected": -2.132044792175293, |
|
"logps/chosen": -1268.3460693359375, |
|
"logps/rejected": -2814.267578125, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.069705843925476, |
|
"rewards/margins": 1.5451180934906006, |
|
"rewards/rejected": -2.614823818206787, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -2.2452187538146973, |
|
"logits/rejected": -2.1340882778167725, |
|
"logps/chosen": -1735.882568359375, |
|
"logps/rejected": -2934.236328125, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.4951032400131226, |
|
"rewards/margins": 1.2158784866333008, |
|
"rewards/rejected": -2.710981845855713, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": -2.2767229080200195, |
|
"logits/rejected": -2.1917612552642822, |
|
"logps/chosen": -1188.2750244140625, |
|
"logps/rejected": -2986.377197265625, |
|
"loss": 0.4469, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9747017025947571, |
|
"rewards/margins": 1.8013055324554443, |
|
"rewards/rejected": -2.7760071754455566, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -2.2052557468414307, |
|
"logits/rejected": -2.0551769733428955, |
|
"logps/chosen": -1045.6455078125, |
|
"logps/rejected": -2074.9453125, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.8170153498649597, |
|
"rewards/margins": 1.0543075799942017, |
|
"rewards/rejected": -1.8713228702545166, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": -2.2373902797698975, |
|
"logits/rejected": -2.1471071243286133, |
|
"logps/chosen": -1441.9759521484375, |
|
"logps/rejected": -3050.029296875, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1936613321304321, |
|
"rewards/margins": 1.6291347742080688, |
|
"rewards/rejected": -2.822796106338501, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -2.1485986709594727, |
|
"logits/rejected": -2.0270209312438965, |
|
"logps/chosen": -1386.749755859375, |
|
"logps/rejected": -3182.661865234375, |
|
"loss": 0.4479, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.131797432899475, |
|
"rewards/margins": 1.841048002243042, |
|
"rewards/rejected": -2.9728455543518066, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": -2.273899793624878, |
|
"logits/rejected": -2.1478586196899414, |
|
"logps/chosen": -800.504150390625, |
|
"logps/rejected": -2901.39501953125, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5838597416877747, |
|
"rewards/margins": 2.1213278770446777, |
|
"rewards/rejected": -2.7051875591278076, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -2.2581982612609863, |
|
"logits/rejected": -2.1440868377685547, |
|
"logps/chosen": -1637.424072265625, |
|
"logps/rejected": -2622.310791015625, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.387995958328247, |
|
"rewards/margins": 1.0160502195358276, |
|
"rewards/rejected": -2.404046058654785, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": -2.2077736854553223, |
|
"logits/rejected": -2.0697312355041504, |
|
"logps/chosen": -1709.1343994140625, |
|
"logps/rejected": -3128.599853515625, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.506415843963623, |
|
"rewards/margins": 1.4321677684783936, |
|
"rewards/rejected": -2.9385836124420166, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.46542558670043943, |
|
"train_runtime": 22527.5186, |
|
"train_samples_per_second": 2.714, |
|
"train_steps_per_second": 0.042 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|