{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.957146863129987, "eval_steps": 500, "global_step": 14500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10197058148724093, "grad_norm": 3.7134974002838135, "learning_rate": 4.8313957441022504e-05, "log_odds_chosen": 0.2222207486629486, "log_odds_ratio": -0.6578092575073242, "logits/chosen": -0.44847217202186584, "logits/rejected": -0.7988718152046204, "logps/chosen": -1.3054556846618652, "logps/rejected": -1.482597827911377, "loss": 1.2524, "nll_loss": 1.1866657733917236, "rewards/accuracies": 0.5920000076293945, "rewards/chosen": -0.13054557144641876, "rewards/margins": 0.01771421544253826, "rewards/rejected": -0.14825978875160217, "step": 500 }, { "epoch": 0.20394116297448187, "grad_norm": 2.004502058029175, "learning_rate": 4.6614317764633906e-05, "log_odds_chosen": 0.35723409056663513, "log_odds_ratio": -0.629149317741394, "logits/chosen": -1.3414314985275269, "logits/rejected": -1.5448620319366455, "logps/chosen": -1.255893588066101, "logps/rejected": -1.5381147861480713, "loss": 1.0135, "nll_loss": 0.9506049752235413, "rewards/accuracies": 0.609000027179718, "rewards/chosen": -0.12558937072753906, "rewards/margins": 0.028222110122442245, "rewards/rejected": -0.1538114696741104, "step": 1000 }, { "epoch": 0.3059117444617228, "grad_norm": 1.5967992544174194, "learning_rate": 4.4914678088245294e-05, "log_odds_chosen": 0.45203697681427, "log_odds_ratio": -0.6168015003204346, "logits/chosen": -1.578227162361145, "logits/rejected": -1.7268493175506592, "logps/chosen": -1.1664234399795532, "logps/rejected": -1.515409231185913, "loss": 0.9167, "nll_loss": 0.8550541400909424, "rewards/accuracies": 0.6147500276565552, "rewards/chosen": -0.11664234101772308, "rewards/margins": 0.034898579120635986, "rewards/rejected": -0.15154090523719788, "step": 1500 }, { "epoch": 0.40788232594896373, "grad_norm": 1.4804637432098389, "learning_rate": 4.321503841185669e-05, "log_odds_chosen": 0.5433089733123779, "log_odds_ratio": -0.605213463306427, "logits/chosen": -1.7934901714324951, "logits/rejected": -1.9556081295013428, "logps/chosen": -1.108548879623413, "logps/rejected": -1.5245370864868164, "loss": 0.8516, "nll_loss": 0.7910776734352112, "rewards/accuracies": 0.628250002861023, "rewards/chosen": -0.11085488647222519, "rewards/margins": 0.04159883037209511, "rewards/rejected": -0.1524537056684494, "step": 2000 }, { "epoch": 0.5098529074362047, "grad_norm": 1.2357059717178345, "learning_rate": 4.151539873546808e-05, "log_odds_chosen": 0.6769158244132996, "log_odds_ratio": -0.5792952179908752, "logits/chosen": -1.9547886848449707, "logits/rejected": -2.1045212745666504, "logps/chosen": -1.04926335811615, "logps/rejected": -1.554589033126831, "loss": 0.7987, "nll_loss": 0.7408133745193481, "rewards/accuracies": 0.656499981880188, "rewards/chosen": -0.10492634028196335, "rewards/margins": 0.050532568246126175, "rewards/rejected": -0.15545891225337982, "step": 2500 }, { "epoch": 0.6118234889234456, "grad_norm": 1.753408432006836, "learning_rate": 3.981575905907948e-05, "log_odds_chosen": 0.7549012303352356, "log_odds_ratio": -0.5683782696723938, "logits/chosen": -2.111973285675049, "logits/rejected": -2.268782615661621, "logps/chosen": -1.0075668096542358, "logps/rejected": -1.5643786191940308, "loss": 0.7492, "nll_loss": 0.6923843026161194, "rewards/accuracies": 0.640999972820282, "rewards/chosen": -0.1007566824555397, "rewards/margins": 0.055681198835372925, "rewards/rejected": -0.15643785893917084, "step": 3000 }, { "epoch": 0.7137940704106865, "grad_norm": 1.8108484745025635, "learning_rate": 3.811611938269087e-05, "log_odds_chosen": 0.7971724271774292, "log_odds_ratio": -0.5602663159370422, "logits/chosen": -2.263519525527954, "logits/rejected": -2.3989477157592773, "logps/chosen": -0.9775514006614685, "logps/rejected": -1.5509178638458252, "loss": 0.7398, "nll_loss": 0.6837689280509949, "rewards/accuracies": 0.6542500257492065, "rewards/chosen": -0.09775513410568237, "rewards/margins": 0.05733664333820343, "rewards/rejected": -0.1550917774438858, "step": 3500 }, { "epoch": 0.8157646518979275, "grad_norm": 0.9750083684921265, "learning_rate": 3.641647970630227e-05, "log_odds_chosen": 0.8391203880310059, "log_odds_ratio": -0.5608585476875305, "logits/chosen": -2.2849109172821045, "logits/rejected": -2.413522720336914, "logps/chosen": -0.9366417527198792, "logps/rejected": -1.5379432439804077, "loss": 0.708, "nll_loss": 0.6518718600273132, "rewards/accuracies": 0.6554999947547913, "rewards/chosen": -0.09366416931152344, "rewards/margins": 0.06013017147779465, "rewards/rejected": -0.1537943333387375, "step": 4000 }, { "epoch": 0.9177352333851684, "grad_norm": 1.1859960556030273, "learning_rate": 3.471684002991366e-05, "log_odds_chosen": 0.9322497844696045, "log_odds_ratio": -0.5479665398597717, "logits/chosen": -2.4118337631225586, "logits/rejected": -2.5123636722564697, "logps/chosen": -0.9224444031715393, "logps/rejected": -1.572380781173706, "loss": 0.6784, "nll_loss": 0.6236190795898438, "rewards/accuracies": 0.6604999899864197, "rewards/chosen": -0.09224444627761841, "rewards/margins": 0.06499364972114563, "rewards/rejected": -0.15723808109760284, "step": 4500 }, { "epoch": 1.0197058148724094, "grad_norm": 1.545590877532959, "learning_rate": 3.301720035352505e-05, "log_odds_chosen": 1.231202244758606, "log_odds_ratio": -0.4899790585041046, "logits/chosen": -2.496483564376831, "logits/rejected": -2.5658133029937744, "logps/chosen": -0.8024629950523376, "logps/rejected": -1.6277039051055908, "loss": 0.5995, "nll_loss": 0.5504766702651978, "rewards/accuracies": 0.7229999899864197, "rewards/chosen": -0.08024629950523376, "rewards/margins": 0.08252407610416412, "rewards/rejected": -0.16277039051055908, "step": 5000 }, { "epoch": 1.1216763963596503, "grad_norm": 1.1230560541152954, "learning_rate": 3.132095995648923e-05, "log_odds_chosen": 2.3458921909332275, "log_odds_ratio": -0.2528613209724426, "logits/chosen": -2.7195615768432617, "logits/rejected": -2.732959270477295, "logps/chosen": -0.4438183605670929, "logps/rejected": -1.9251257181167603, "loss": 0.3371, "nll_loss": 0.311859130859375, "rewards/accuracies": 0.9585000276565552, "rewards/chosen": -0.04438183456659317, "rewards/margins": 0.14813074469566345, "rewards/rejected": -0.19251257181167603, "step": 5500 }, { "epoch": 1.2236469778468912, "grad_norm": 1.688524603843689, "learning_rate": 2.9621320280100618e-05, "log_odds_chosen": 2.3827967643737793, "log_odds_ratio": -0.25957345962524414, "logits/chosen": -2.720367193222046, "logits/rejected": -2.767230987548828, "logps/chosen": -0.44125357270240784, "logps/rejected": -1.953121304512024, "loss": 0.334, "nll_loss": 0.3080129623413086, "rewards/accuracies": 0.9617499709129333, "rewards/chosen": -0.04412535950541496, "rewards/margins": 0.1511867791414261, "rewards/rejected": -0.19531212747097015, "step": 6000 }, { "epoch": 1.3256175593341322, "grad_norm": 1.2772260904312134, "learning_rate": 2.792507988306479e-05, "log_odds_chosen": 2.2994472980499268, "log_odds_ratio": -0.26726964116096497, "logits/chosen": -2.7083919048309326, "logits/rejected": -2.772411823272705, "logps/chosen": -0.45140743255615234, "logps/rejected": -1.911563754081726, "loss": 0.3471, "nll_loss": 0.32037463784217834, "rewards/accuracies": 0.9572499990463257, "rewards/chosen": -0.045140739530324936, "rewards/margins": 0.14601562917232513, "rewards/rejected": -0.19115637242794037, "step": 6500 }, { "epoch": 1.427588140821373, "grad_norm": 1.3095237016677856, "learning_rate": 2.622544020667619e-05, "log_odds_chosen": 2.344271659851074, "log_odds_ratio": -0.27038824558258057, "logits/chosen": -2.74202823638916, "logits/rejected": -2.7950618267059326, "logps/chosen": -0.42455798387527466, "logps/rejected": -1.880998134613037, "loss": 0.3258, "nll_loss": 0.29875221848487854, "rewards/accuracies": 0.952750027179718, "rewards/chosen": -0.042455799877643585, "rewards/margins": 0.14564399421215057, "rewards/rejected": -0.18809978663921356, "step": 7000 }, { "epoch": 1.529558722308614, "grad_norm": 1.1868637800216675, "learning_rate": 2.452580053028758e-05, "log_odds_chosen": 2.3831584453582764, "log_odds_ratio": -0.26601019501686096, "logits/chosen": -2.7699034214019775, "logits/rejected": -2.8358941078186035, "logps/chosen": -0.43710044026374817, "logps/rejected": -1.9111379384994507, "loss": 0.3313, "nll_loss": 0.3046627938747406, "rewards/accuracies": 0.9614999890327454, "rewards/chosen": -0.043710045516490936, "rewards/margins": 0.147403746843338, "rewards/rejected": -0.19111379981040955, "step": 7500 }, { "epoch": 1.631529303795855, "grad_norm": 1.8944728374481201, "learning_rate": 2.2826160853898975e-05, "log_odds_chosen": 2.470182180404663, "log_odds_ratio": -0.2623186409473419, "logits/chosen": -2.7561872005462646, "logits/rejected": -2.7932546138763428, "logps/chosen": -0.43067559599876404, "logps/rejected": -1.9674099683761597, "loss": 0.325, "nll_loss": 0.2987590432167053, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.043067559599876404, "rewards/margins": 0.1536734402179718, "rewards/rejected": -0.196740984916687, "step": 8000 }, { "epoch": 1.7334998852830958, "grad_norm": 1.0028513669967651, "learning_rate": 2.112652117751037e-05, "log_odds_chosen": 2.5264878273010254, "log_odds_ratio": -0.25732529163360596, "logits/chosen": -2.8135437965393066, "logits/rejected": -2.852220058441162, "logps/chosen": -0.41469958424568176, "logps/rejected": -1.949130654335022, "loss": 0.3154, "nll_loss": 0.2897038757801056, "rewards/accuracies": 0.9607499837875366, "rewards/chosen": -0.04146995395421982, "rewards/margins": 0.1534431129693985, "rewards/rejected": -0.1949130743741989, "step": 8500 }, { "epoch": 1.8354704667703368, "grad_norm": 0.9701994061470032, "learning_rate": 1.9426881501121764e-05, "log_odds_chosen": 2.3967556953430176, "log_odds_ratio": -0.27115362882614136, "logits/chosen": -2.7224009037017822, "logits/rejected": -2.7754287719726562, "logps/chosen": -0.4164165258407593, "logps/rejected": -1.8547587394714355, "loss": 0.3216, "nll_loss": 0.29452264308929443, "rewards/accuracies": 0.9505000114440918, "rewards/chosen": -0.041641656309366226, "rewards/margins": 0.1438342034816742, "rewards/rejected": -0.18547587096691132, "step": 9000 }, { "epoch": 1.9374410482575777, "grad_norm": 0.7663633823394775, "learning_rate": 1.7727241824733156e-05, "log_odds_chosen": 2.5609018802642822, "log_odds_ratio": -0.26221731305122375, "logits/chosen": -2.8092148303985596, "logits/rejected": -2.836301803588867, "logps/chosen": -0.3934709429740906, "logps/rejected": -1.921687364578247, "loss": 0.3055, "nll_loss": 0.27924445271492004, "rewards/accuracies": 0.9557499885559082, "rewards/chosen": -0.0393470898270607, "rewards/margins": 0.1528216451406479, "rewards/rejected": -0.192168727517128, "step": 9500 }, { "epoch": 2.039411629744819, "grad_norm": 0.6147420406341553, "learning_rate": 1.602760214834455e-05, "log_odds_chosen": 3.2184579372406006, "log_odds_ratio": -0.18695417046546936, "logits/chosen": -3.0869991779327393, "logits/rejected": -3.0871739387512207, "logps/chosen": -0.3052729070186615, "logps/rejected": -2.1187493801116943, "loss": 0.2355, "nll_loss": 0.2168271690607071, "rewards/accuracies": 0.9782500267028809, "rewards/chosen": -0.03052728995680809, "rewards/margins": 0.18134763836860657, "rewards/rejected": -0.21187493205070496, "step": 10000 }, { "epoch": 2.1413822112320595, "grad_norm": 0.8968594074249268, "learning_rate": 1.4327962471955947e-05, "log_odds_chosen": 4.477025032043457, "log_odds_ratio": -0.07086528092622757, "logits/chosen": -3.4468417167663574, "logits/rejected": -3.4328184127807617, "logps/chosen": -0.1490449756383896, "logps/rejected": -2.5894744396209717, "loss": 0.1219, "nll_loss": 0.11482993513345718, "rewards/accuracies": 0.9940000176429749, "rewards/chosen": -0.014904499053955078, "rewards/margins": 0.24404291808605194, "rewards/rejected": -0.2589474618434906, "step": 10500 }, { "epoch": 2.2433527927193007, "grad_norm": 0.7543737292289734, "learning_rate": 1.262832279556734e-05, "log_odds_chosen": 4.517333030700684, "log_odds_ratio": -0.07165472209453583, "logits/chosen": -3.4554872512817383, "logits/rejected": -3.4509389400482178, "logps/chosen": -0.151213601231575, "logps/rejected": -2.6054680347442627, "loss": 0.1234, "nll_loss": 0.11624418199062347, "rewards/accuracies": 0.9909999966621399, "rewards/chosen": -0.015121362172067165, "rewards/margins": 0.24542541801929474, "rewards/rejected": -0.2605467736721039, "step": 11000 }, { "epoch": 2.3453233742065414, "grad_norm": 0.7870352864265442, "learning_rate": 1.0928683119178734e-05, "log_odds_chosen": 4.504143238067627, "log_odds_ratio": -0.0680876150727272, "logits/chosen": -3.3482730388641357, "logits/rejected": -3.362037181854248, "logps/chosen": -0.15102657675743103, "logps/rejected": -2.60615873336792, "loss": 0.1227, "nll_loss": 0.11586029082536697, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.015102657489478588, "rewards/margins": 0.24551323056221008, "rewards/rejected": -0.26061588525772095, "step": 11500 }, { "epoch": 2.4472939556937825, "grad_norm": 0.621370792388916, "learning_rate": 9.22904344279013e-06, "log_odds_chosen": 4.464700698852539, "log_odds_ratio": -0.07176685333251953, "logits/chosen": -3.3470752239227295, "logits/rejected": -3.3573222160339355, "logps/chosen": -0.14544738829135895, "logps/rejected": -2.5615859031677246, "loss": 0.12, "nll_loss": 0.11279115825891495, "rewards/accuracies": 0.9892500042915344, "rewards/chosen": -0.014544738456606865, "rewards/margins": 0.24161386489868164, "rewards/rejected": -0.25615859031677246, "step": 12000 }, { "epoch": 2.549264537181023, "grad_norm": 0.8108669519424438, "learning_rate": 7.529403766401523e-06, "log_odds_chosen": 4.566464424133301, "log_odds_ratio": -0.06760641187429428, "logits/chosen": -3.3751792907714844, "logits/rejected": -3.371516227722168, "logps/chosen": -0.14034530520439148, "logps/rejected": -2.578918933868408, "loss": 0.1152, "nll_loss": 0.1084122508764267, "rewards/accuracies": 0.9932500123977661, "rewards/chosen": -0.014034531079232693, "rewards/margins": 0.24385739862918854, "rewards/rejected": -0.2578918933868408, "step": 12500 }, { "epoch": 2.6512351186682643, "grad_norm": 1.0674421787261963, "learning_rate": 5.829764090012918e-06, "log_odds_chosen": 4.609077453613281, "log_odds_ratio": -0.06549856811761856, "logits/chosen": -3.320563316345215, "logits/rejected": -3.3299055099487305, "logps/chosen": -0.1383795589208603, "logps/rejected": -2.625169515609741, "loss": 0.1144, "nll_loss": 0.10785890370607376, "rewards/accuracies": 0.9932500123977661, "rewards/chosen": -0.013837955892086029, "rewards/margins": 0.2486789971590042, "rewards/rejected": -0.26251694560050964, "step": 13000 }, { "epoch": 2.753205700155505, "grad_norm": 0.9907452464103699, "learning_rate": 4.1301244136243116e-06, "log_odds_chosen": 4.6315083503723145, "log_odds_ratio": -0.06764637678861618, "logits/chosen": -3.35665225982666, "logits/rejected": -3.365429639816284, "logps/chosen": -0.14084672927856445, "logps/rejected": -2.6286959648132324, "loss": 0.1155, "nll_loss": 0.10870947688817978, "rewards/accuracies": 0.9917500019073486, "rewards/chosen": -0.01408467348664999, "rewards/margins": 0.24878491461277008, "rewards/rejected": -0.26286956667900085, "step": 13500 }, { "epoch": 2.855176281642746, "grad_norm": 1.0650596618652344, "learning_rate": 2.4304847372357063e-06, "log_odds_chosen": 4.683770656585693, "log_odds_ratio": -0.06298153102397919, "logits/chosen": -3.343515634536743, "logits/rejected": -3.349973201751709, "logps/chosen": -0.1364256739616394, "logps/rejected": -2.6349427700042725, "loss": 0.111, "nll_loss": 0.10465656220912933, "rewards/accuracies": 0.9919999837875366, "rewards/chosen": -0.013642566278576851, "rewards/margins": 0.24985171854496002, "rewards/rejected": -0.26349425315856934, "step": 14000 }, { "epoch": 2.957146863129987, "grad_norm": 0.7051740288734436, "learning_rate": 7.308450608471004e-07, "log_odds_chosen": 4.733154296875, "log_odds_ratio": -0.06264213472604752, "logits/chosen": -3.384558916091919, "logits/rejected": -3.383835554122925, "logps/chosen": -0.13168923556804657, "logps/rejected": -2.653200149536133, "loss": 0.1086, "nll_loss": 0.10234662145376205, "rewards/accuracies": 0.9934999942779541, "rewards/chosen": -0.01316892635077238, "rewards/margins": 0.2521510720252991, "rewards/rejected": -0.2653200030326843, "step": 14500 } ], "logging_steps": 500, "max_steps": 14709, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }