|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.997867803837953, |
|
"eval_steps": 500, |
|
"global_step": 234, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.024216890335083, |
|
"logits/rejected": -1.8819010257720947, |
|
"logps/chosen": -1305.559326171875, |
|
"logps/rejected": -3790.57275390625, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2936673164367676, |
|
"logits/rejected": -2.1570396423339844, |
|
"logps/chosen": -1376.771728515625, |
|
"logps/rejected": -3041.7578125, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.037909653037786484, |
|
"rewards/margins": 0.009223480708897114, |
|
"rewards/rejected": -0.04713314026594162, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.1768686771392822, |
|
"logits/rejected": -2.084563732147217, |
|
"logps/chosen": -2079.7646484375, |
|
"logps/rejected": -3319.23583984375, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.7143774628639221, |
|
"rewards/margins": 0.04834098741412163, |
|
"rewards/rejected": -0.7627183794975281, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.145012617111206, |
|
"logits/rejected": -2.027907609939575, |
|
"logps/chosen": -2782.030517578125, |
|
"logps/rejected": -4380.7275390625, |
|
"loss": 0.4871, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -1.3868815898895264, |
|
"rewards/margins": 0.28924745321273804, |
|
"rewards/rejected": -1.6761291027069092, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9287250957321685e-06, |
|
"logits/chosen": -2.2485415935516357, |
|
"logits/rejected": -2.1334125995635986, |
|
"logps/chosen": -2663.207763671875, |
|
"logps/rejected": -3975.64892578125, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.1883825063705444, |
|
"rewards/margins": 0.22184336185455322, |
|
"rewards/rejected": -1.4102258682250977, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.813260751184992e-06, |
|
"logits/chosen": -2.2985146045684814, |
|
"logits/rejected": -2.210111141204834, |
|
"logps/chosen": -1755.809326171875, |
|
"logps/rejected": -3258.091796875, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6678739190101624, |
|
"rewards/margins": 0.2041586935520172, |
|
"rewards/rejected": -0.872032642364502, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -2.2362232208251953, |
|
"logits/rejected": -2.1845810413360596, |
|
"logps/chosen": -2074.88134765625, |
|
"logps/rejected": -3796.244873046875, |
|
"loss": 0.473, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.5578988790512085, |
|
"rewards/margins": 0.24681946635246277, |
|
"rewards/rejected": -0.8047183752059937, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.431042398061499e-06, |
|
"logits/chosen": -2.382171392440796, |
|
"logits/rejected": -2.307375431060791, |
|
"logps/chosen": -1571.745361328125, |
|
"logps/rejected": -3834.587158203125, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.559153139591217, |
|
"rewards/margins": 0.5565303564071655, |
|
"rewards/rejected": -1.1156834363937378, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.172826515897146e-06, |
|
"logits/chosen": -2.408031940460205, |
|
"logits/rejected": -2.3543269634246826, |
|
"logps/chosen": -2339.724609375, |
|
"logps/rejected": -3922.291748046875, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.0054388046264648, |
|
"rewards/margins": 0.39337557554244995, |
|
"rewards/rejected": -1.39881432056427, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -2.4260120391845703, |
|
"logits/rejected": -2.373760461807251, |
|
"logps/chosen": -2360.41552734375, |
|
"logps/rejected": -3782.758544921875, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.8136354684829712, |
|
"rewards/margins": 0.4064968228340149, |
|
"rewards/rejected": -1.2201323509216309, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5508930707739143e-06, |
|
"logits/chosen": -2.4029459953308105, |
|
"logits/rejected": -2.3526365756988525, |
|
"logps/chosen": -2014.8980712890625, |
|
"logps/rejected": -3683.920654296875, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.5975068211555481, |
|
"rewards/margins": 0.31760281324386597, |
|
"rewards/rejected": -0.9151096343994141, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.201068473265007e-06, |
|
"logits/chosen": -2.452263355255127, |
|
"logits/rejected": -2.4388153553009033, |
|
"logps/chosen": -2102.702392578125, |
|
"logps/rejected": -4107.12255859375, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.7340173125267029, |
|
"rewards/margins": 0.48010140657424927, |
|
"rewards/rejected": -1.2141185998916626, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -2.4622802734375, |
|
"logits/rejected": -2.453519344329834, |
|
"logps/chosen": -2598.88037109375, |
|
"logps/rejected": -4125.0986328125, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9378460049629211, |
|
"rewards/margins": 0.3462037444114685, |
|
"rewards/rejected": -1.2840497493743896, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4626014824618418e-06, |
|
"logits/chosen": -2.5731091499328613, |
|
"logits/rejected": -2.559770107269287, |
|
"logps/chosen": -2807.160400390625, |
|
"logps/rejected": -4325.0205078125, |
|
"loss": 0.4802, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.1455607414245605, |
|
"rewards/margins": 0.6021825075149536, |
|
"rewards/rejected": -1.7477432489395142, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.090455221462156e-06, |
|
"logits/chosen": -2.468670606613159, |
|
"logits/rejected": -2.4780094623565674, |
|
"logps/chosen": -2137.4091796875, |
|
"logps/rejected": -4046.2734375, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8530462980270386, |
|
"rewards/margins": 0.3694917857646942, |
|
"rewards/rejected": -1.2225382328033447, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -2.4550023078918457, |
|
"logits/rejected": -2.4524893760681152, |
|
"logps/chosen": -1734.0328369140625, |
|
"logps/rejected": -3453.727783203125, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.5440791845321655, |
|
"rewards/margins": 0.4087960124015808, |
|
"rewards/rejected": -0.9528751373291016, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3817171292109182e-06, |
|
"logits/chosen": -2.520023822784424, |
|
"logits/rejected": -2.549712657928467, |
|
"logps/chosen": -2198.69189453125, |
|
"logps/rejected": -3628.389892578125, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.6990865468978882, |
|
"rewards/margins": 0.3725363612174988, |
|
"rewards/rejected": -1.0716229677200317, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.0609573357858166e-06, |
|
"logits/chosen": -2.5392794609069824, |
|
"logits/rejected": -2.5447866916656494, |
|
"logps/chosen": -1648.3802490234375, |
|
"logps/rejected": -4229.2060546875, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7170668840408325, |
|
"rewards/margins": 0.6884183883666992, |
|
"rewards/rejected": -1.4054853916168213, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -2.524719476699829, |
|
"logits/rejected": -2.5857484340667725, |
|
"logps/chosen": -2204.722900390625, |
|
"logps/rejected": -4786.68408203125, |
|
"loss": 0.47, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9566167593002319, |
|
"rewards/margins": 0.7360025644302368, |
|
"rewards/rejected": -1.6926193237304688, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.223224133591475e-07, |
|
"logits/chosen": -2.5650665760040283, |
|
"logits/rejected": -2.5723259449005127, |
|
"logps/chosen": -2591.955322265625, |
|
"logps/rejected": -4333.18701171875, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.0086050033569336, |
|
"rewards/margins": 0.689775824546814, |
|
"rewards/rejected": -1.6983808279037476, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.164794984571759e-07, |
|
"logits/chosen": -2.4675166606903076, |
|
"logits/rejected": -2.536062717437744, |
|
"logps/chosen": -2089.778564453125, |
|
"logps/rejected": -4025.212158203125, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.8992789387702942, |
|
"rewards/margins": 0.5448096990585327, |
|
"rewards/rejected": -1.4440886974334717, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -2.560384750366211, |
|
"logits/rejected": -2.6164653301239014, |
|
"logps/chosen": -2061.99658203125, |
|
"logps/rejected": -3734.251953125, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.9695846438407898, |
|
"rewards/margins": 0.20665684342384338, |
|
"rewards/rejected": -1.176241397857666, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.463099816548578e-08, |
|
"logits/chosen": -2.4755778312683105, |
|
"logits/rejected": -2.5548205375671387, |
|
"logps/chosen": -1978.981689453125, |
|
"logps/rejected": -4176.359375, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8206374049186707, |
|
"rewards/margins": 0.5447834730148315, |
|
"rewards/rejected": -1.365420937538147, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.474675580662113e-09, |
|
"logits/chosen": -2.485661745071411, |
|
"logits/rejected": -2.55751371383667, |
|
"logps/chosen": -2132.883056640625, |
|
"logps/rejected": -4433.28271484375, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7833830714225769, |
|
"rewards/margins": 0.7396507263183594, |
|
"rewards/rejected": -1.523033857345581, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 234, |
|
"total_flos": 0.0, |
|
"train_loss": 0.47607828460187995, |
|
"train_runtime": 5505.1583, |
|
"train_samples_per_second": 2.725, |
|
"train_steps_per_second": 0.043 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 234, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|