|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 500, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.712186498887842, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.4551665782928467, |
|
"logits/rejected": -1.606083869934082, |
|
"logps/chosen": -144.822265625, |
|
"logps/pi_response": -243.71868896484375, |
|
"logps/ref_response": -243.71868896484375, |
|
"logps/rejected": -162.54443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 15.364437403667353, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.8610674142837524, |
|
"logits/rejected": -1.8439589738845825, |
|
"logps/chosen": -181.88343811035156, |
|
"logps/pi_response": -265.68951416015625, |
|
"logps/ref_response": -263.9686584472656, |
|
"logps/rejected": -183.3703155517578, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": -0.005265166517347097, |
|
"rewards/margins": -0.0010041914647445083, |
|
"rewards/rejected": -0.004260974936187267, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.345421633966485, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": -1.6329383850097656, |
|
"logits/rejected": -1.6391578912734985, |
|
"logps/chosen": -178.48463439941406, |
|
"logps/pi_response": -288.0892639160156, |
|
"logps/ref_response": -259.4861755371094, |
|
"logps/rejected": -181.0362091064453, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.13044723868370056, |
|
"rewards/margins": 0.01964881829917431, |
|
"rewards/rejected": -0.1500960886478424, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.13627597968646, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": -1.431341290473938, |
|
"logits/rejected": -1.421644687652588, |
|
"logps/chosen": -222.0276641845703, |
|
"logps/pi_response": -336.2035827636719, |
|
"logps/ref_response": -273.8411865234375, |
|
"logps/rejected": -218.7247314453125, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.4222361147403717, |
|
"rewards/margins": 0.015909332782030106, |
|
"rewards/rejected": -0.43814539909362793, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 14.253311206515136, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": -1.4934971332550049, |
|
"logits/rejected": -1.447249412536621, |
|
"logps/chosen": -192.37478637695312, |
|
"logps/pi_response": -272.644775390625, |
|
"logps/ref_response": -250.92996215820312, |
|
"logps/rejected": -193.36312866210938, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.19741061329841614, |
|
"rewards/margins": 0.010645559057593346, |
|
"rewards/rejected": -0.20805618166923523, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 16.796841718291038, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": -1.7099899053573608, |
|
"logits/rejected": -1.7535244226455688, |
|
"logps/chosen": -194.2742462158203, |
|
"logps/pi_response": -277.76318359375, |
|
"logps/ref_response": -266.2118835449219, |
|
"logps/rejected": -201.6314239501953, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.14295700192451477, |
|
"rewards/margins": 0.017321351915597916, |
|
"rewards/rejected": -0.1602783501148224, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 14.319074470267418, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": -1.2345731258392334, |
|
"logits/rejected": -1.2892169952392578, |
|
"logps/chosen": -206.3093719482422, |
|
"logps/pi_response": -289.80902099609375, |
|
"logps/ref_response": -256.5821533203125, |
|
"logps/rejected": -212.19924926757812, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": -0.30671426653862, |
|
"rewards/margins": 0.010099029168486595, |
|
"rewards/rejected": -0.31681329011917114, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 24.50253677275901, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": -1.03169584274292, |
|
"logits/rejected": -1.021615743637085, |
|
"logps/chosen": -221.443359375, |
|
"logps/pi_response": -319.42242431640625, |
|
"logps/ref_response": -262.21759033203125, |
|
"logps/rejected": -225.0926971435547, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -0.44103240966796875, |
|
"rewards/margins": 0.028266970068216324, |
|
"rewards/rejected": -0.4692993760108948, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 19.526479759830394, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": -0.8754630088806152, |
|
"logits/rejected": -0.9458340406417847, |
|
"logps/chosen": -225.5301971435547, |
|
"logps/pi_response": -346.0572204589844, |
|
"logps/ref_response": -269.8856201171875, |
|
"logps/rejected": -233.25045776367188, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.513852596282959, |
|
"rewards/margins": 0.01622053235769272, |
|
"rewards/rejected": -0.5300731658935547, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 21.868311725000435, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": -0.814942479133606, |
|
"logits/rejected": -0.8241022825241089, |
|
"logps/chosen": -234.712646484375, |
|
"logps/pi_response": -357.76568603515625, |
|
"logps/ref_response": -260.8216552734375, |
|
"logps/rejected": -250.9972686767578, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.6007817983627319, |
|
"rewards/margins": 0.07638835906982422, |
|
"rewards/rejected": -0.6771702170372009, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.43299106630643, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": -0.897548496723175, |
|
"logits/rejected": -0.932096004486084, |
|
"logps/chosen": -236.4069061279297, |
|
"logps/pi_response": -367.8551940917969, |
|
"logps/ref_response": -274.1803283691406, |
|
"logps/rejected": -245.63119506835938, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -0.5783185958862305, |
|
"rewards/margins": 0.0737391859292984, |
|
"rewards/rejected": -0.6520577669143677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.07983919936344, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": -0.7010077834129333, |
|
"logits/rejected": -0.7780792713165283, |
|
"logps/chosen": -239.2875518798828, |
|
"logps/pi_response": -375.39691162109375, |
|
"logps/ref_response": -266.8659362792969, |
|
"logps/rejected": -249.2735595703125, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.6955354809761047, |
|
"rewards/margins": 0.06171605736017227, |
|
"rewards/rejected": -0.7572515606880188, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.440065783036147, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": -0.7951020002365112, |
|
"logits/rejected": -0.8204092979431152, |
|
"logps/chosen": -261.5052795410156, |
|
"logps/pi_response": -384.1082458496094, |
|
"logps/ref_response": -273.2558288574219, |
|
"logps/rejected": -263.2559509277344, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.7765410542488098, |
|
"rewards/margins": 0.03736639395356178, |
|
"rewards/rejected": -0.8139075040817261, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 20.63429522696927, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": -0.6491920351982117, |
|
"logits/rejected": -0.6609460711479187, |
|
"logps/chosen": -245.2913055419922, |
|
"logps/pi_response": -372.6864929199219, |
|
"logps/ref_response": -255.30258178710938, |
|
"logps/rejected": -250.04989624023438, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -0.7680090665817261, |
|
"rewards/margins": 0.01968952640891075, |
|
"rewards/rejected": -0.78769850730896, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 18.194304704086697, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": -0.8394004702568054, |
|
"logits/rejected": -0.8077179789543152, |
|
"logps/chosen": -243.73538208007812, |
|
"logps/pi_response": -391.2145690917969, |
|
"logps/ref_response": -279.54559326171875, |
|
"logps/rejected": -255.0926513671875, |
|
"loss": 0.6744, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": -0.700167179107666, |
|
"rewards/margins": 0.07673807442188263, |
|
"rewards/rejected": -0.7769052386283875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eta": 0.0010000000474974513, |
|
"grad_norm": 17.724132159919495, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": -0.7538624405860901, |
|
"logits/rejected": -0.7211672067642212, |
|
"logps/chosen": -247.0625457763672, |
|
"logps/pi_response": -371.68939208984375, |
|
"logps/ref_response": -256.93328857421875, |
|
"logps/rejected": -252.12765502929688, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -0.7719146609306335, |
|
"rewards/margins": 0.03903389722108841, |
|
"rewards/rejected": -0.8109486699104309, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6867941472774897, |
|
"train_runtime": 22649.1506, |
|
"train_samples_per_second": 0.883, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|