|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.843719482421875, |
|
"logits/rejected": -2.8718748092651367, |
|
"logps/chosen": -340.24505615234375, |
|
"logps/pi_response": -88.81813049316406, |
|
"logps/ref_response": -88.81813049316406, |
|
"logps/rejected": -141.60296630859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.775027275085449, |
|
"logits/rejected": -2.7582144737243652, |
|
"logps/chosen": -225.69427490234375, |
|
"logps/pi_response": -73.32665252685547, |
|
"logps/ref_response": -73.0557632446289, |
|
"logps/rejected": -127.79769134521484, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5486111044883728, |
|
"rewards/chosen": 0.0009001567959785461, |
|
"rewards/margins": 0.0027927933260798454, |
|
"rewards/rejected": -0.0018926364136859775, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.7578864097595215, |
|
"logits/rejected": -2.7449183464050293, |
|
"logps/chosen": -216.22109985351562, |
|
"logps/pi_response": -79.10755920410156, |
|
"logps/ref_response": -73.02125549316406, |
|
"logps/rejected": -117.88703918457031, |
|
"loss": 0.6655, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.005766990128904581, |
|
"rewards/margins": 0.042582910507917404, |
|
"rewards/rejected": -0.03681592270731926, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.636462688446045, |
|
"logits/rejected": -2.6021816730499268, |
|
"logps/chosen": -240.2003936767578, |
|
"logps/pi_response": -100.06871032714844, |
|
"logps/ref_response": -68.24726867675781, |
|
"logps/rejected": -125.3204345703125, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.08825792372226715, |
|
"rewards/margins": 0.159133642911911, |
|
"rewards/rejected": -0.24739158153533936, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.5370068550109863, |
|
"logits/rejected": -2.5128865242004395, |
|
"logps/chosen": -271.98651123046875, |
|
"logps/pi_response": -144.71682739257812, |
|
"logps/ref_response": -75.6551742553711, |
|
"logps/rejected": -181.0801544189453, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23095576465129852, |
|
"rewards/margins": 0.38431650400161743, |
|
"rewards/rejected": -0.6152722239494324, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.5748300552368164, |
|
"logits/rejected": -2.5561716556549072, |
|
"logps/chosen": -276.7567138671875, |
|
"logps/pi_response": -160.7132110595703, |
|
"logps/ref_response": -71.23826599121094, |
|
"logps/rejected": -182.82472229003906, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3602275252342224, |
|
"rewards/margins": 0.41353344917297363, |
|
"rewards/rejected": -0.7737610340118408, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.5654828548431396, |
|
"logits/rejected": -2.519744634628296, |
|
"logps/chosen": -221.7403106689453, |
|
"logps/pi_response": -161.64913940429688, |
|
"logps/ref_response": -67.74427795410156, |
|
"logps/rejected": -205.1297607421875, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.35707658529281616, |
|
"rewards/margins": 0.4391842782497406, |
|
"rewards/rejected": -0.7962608337402344, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.5978214740753174, |
|
"logits/rejected": -2.557990550994873, |
|
"logps/chosen": -270.705810546875, |
|
"logps/pi_response": -191.92599487304688, |
|
"logps/ref_response": -73.56436157226562, |
|
"logps/rejected": -205.6122589111328, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.4569978713989258, |
|
"rewards/margins": 0.58502596616745, |
|
"rewards/rejected": -1.0420238971710205, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.6471352577209473, |
|
"logits/rejected": -2.6175568103790283, |
|
"logps/chosen": -273.21337890625, |
|
"logps/pi_response": -190.89492797851562, |
|
"logps/ref_response": -77.31620025634766, |
|
"logps/rejected": -209.2509307861328, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4046536982059479, |
|
"rewards/margins": 0.6309649348258972, |
|
"rewards/rejected": -1.035618543624878, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.5935592651367188, |
|
"logits/rejected": -2.5323500633239746, |
|
"logps/chosen": -294.3185119628906, |
|
"logps/pi_response": -193.82952880859375, |
|
"logps/ref_response": -72.20295715332031, |
|
"logps/rejected": -214.9334716796875, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4483931064605713, |
|
"rewards/margins": 0.6660177707672119, |
|
"rewards/rejected": -1.1144107580184937, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.584444522857666, |
|
"logits/rejected": -2.538541078567505, |
|
"logps/chosen": -310.3904113769531, |
|
"logps/pi_response": -218.18429565429688, |
|
"logps/ref_response": -74.52151489257812, |
|
"logps/rejected": -236.4603271484375, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.604050874710083, |
|
"rewards/margins": 0.6805473566055298, |
|
"rewards/rejected": -1.2845981121063232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.508607864379883, |
|
"logits/rejected": -2.4556093215942383, |
|
"logps/chosen": -275.2364501953125, |
|
"logps/pi_response": -222.62771606445312, |
|
"logps/ref_response": -63.37713623046875, |
|
"logps/rejected": -252.931640625, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8243808746337891, |
|
"rewards/margins": 0.6452677845954895, |
|
"rewards/rejected": -1.4696485996246338, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.523491621017456, |
|
"logits/rejected": -2.471564292907715, |
|
"logps/chosen": -280.9041748046875, |
|
"logps/pi_response": -221.3182373046875, |
|
"logps/ref_response": -66.3117446899414, |
|
"logps/rejected": -240.2959442138672, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.5750901699066162, |
|
"rewards/margins": 0.953150749206543, |
|
"rewards/rejected": -1.5282410383224487, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.496025800704956, |
|
"logits/rejected": -2.4560306072235107, |
|
"logps/chosen": -316.5722961425781, |
|
"logps/pi_response": -236.9508056640625, |
|
"logps/ref_response": -76.07049560546875, |
|
"logps/rejected": -261.31170654296875, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6082834601402283, |
|
"rewards/margins": 0.9130091667175293, |
|
"rewards/rejected": -1.5212925672531128, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.502040147781372, |
|
"logits/rejected": -2.4415221214294434, |
|
"logps/chosen": -303.0337219238281, |
|
"logps/pi_response": -231.24246215820312, |
|
"logps/ref_response": -78.19743347167969, |
|
"logps/rejected": -247.8349151611328, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.6394041776657104, |
|
"rewards/margins": 0.7987918257713318, |
|
"rewards/rejected": -1.4381959438323975, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.5210301876068115, |
|
"logits/rejected": -2.4584171772003174, |
|
"logps/chosen": -301.7514343261719, |
|
"logps/pi_response": -243.6031494140625, |
|
"logps/ref_response": -77.64109802246094, |
|
"logps/rejected": -281.96185302734375, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6782695055007935, |
|
"rewards/margins": 0.8968443870544434, |
|
"rewards/rejected": -1.5751138925552368, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5467530106598476, |
|
"train_runtime": 4156.701, |
|
"train_samples_per_second": 4.902, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|