|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 287, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003484320557491289, |
|
"grad_norm": 260.1177061514537, |
|
"learning_rate": 1.3793103448275862e-08, |
|
"logits/chosen": -2.5345611572265625, |
|
"logits/rejected": -2.581700563430786, |
|
"logps/chosen": -60.002105712890625, |
|
"logps/rejected": -99.98374938964844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03484320557491289, |
|
"grad_norm": 238.95036737182184, |
|
"learning_rate": 1.3793103448275863e-07, |
|
"logits/chosen": -2.563187837600708, |
|
"logits/rejected": -2.5619239807128906, |
|
"logps/chosen": -59.64890670776367, |
|
"logps/rejected": -73.38821411132812, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.2430555522441864, |
|
"rewards/chosen": 0.004840263165533543, |
|
"rewards/margins": 0.008125737309455872, |
|
"rewards/rejected": -0.003285474143922329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06968641114982578, |
|
"grad_norm": 287.8433464848162, |
|
"learning_rate": 2.7586206896551726e-07, |
|
"logits/chosen": -2.606386184692383, |
|
"logits/rejected": -2.56528902053833, |
|
"logps/chosen": -103.9992446899414, |
|
"logps/rejected": -94.85326385498047, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.05751848220825195, |
|
"rewards/margins": 0.0391354039311409, |
|
"rewards/rejected": 0.018383082002401352, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.10452961672473868, |
|
"grad_norm": 303.36666472427896, |
|
"learning_rate": 3.999851729426529e-07, |
|
"logits/chosen": -2.5944175720214844, |
|
"logits/rejected": -2.5745928287506104, |
|
"logps/chosen": -82.03970336914062, |
|
"logps/rejected": -91.46243286132812, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.20558810234069824, |
|
"rewards/margins": 0.1706041693687439, |
|
"rewards/rejected": 0.03498392552137375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13937282229965156, |
|
"grad_norm": 243.63595613939415, |
|
"learning_rate": 3.982085846069669e-07, |
|
"logits/chosen": -2.4932851791381836, |
|
"logits/rejected": -2.4914209842681885, |
|
"logps/chosen": -77.8083267211914, |
|
"logps/rejected": -73.65955352783203, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.04954088479280472, |
|
"rewards/margins": 0.3457469344139099, |
|
"rewards/rejected": -0.2962060570716858, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"grad_norm": 209.2280603066799, |
|
"learning_rate": 3.93496739411485e-07, |
|
"logits/chosen": -2.519728422164917, |
|
"logits/rejected": -2.5236926078796387, |
|
"logps/chosen": -63.118324279785156, |
|
"logps/rejected": -75.77721405029297, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": 0.3693724274635315, |
|
"rewards/margins": 0.22256764769554138, |
|
"rewards/rejected": 0.14680473506450653, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.20905923344947736, |
|
"grad_norm": 207.31692334790202, |
|
"learning_rate": 3.859194147372412e-07, |
|
"logits/chosen": -2.4840266704559326, |
|
"logits/rejected": -2.477837324142456, |
|
"logps/chosen": -71.02902221679688, |
|
"logps/rejected": -66.88030242919922, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.9798685908317566, |
|
"rewards/margins": 0.2921520173549652, |
|
"rewards/rejected": 0.687716543674469, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 237.196777331744, |
|
"learning_rate": 3.7558882264746717e-07, |
|
"logits/chosen": -2.5007472038269043, |
|
"logits/rejected": -2.4955484867095947, |
|
"logps/chosen": -61.67304611206055, |
|
"logps/rejected": -66.74317169189453, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 1.153420090675354, |
|
"rewards/margins": 0.39587074518203735, |
|
"rewards/rejected": 0.7575494050979614, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2787456445993031, |
|
"grad_norm": 258.03955447258926, |
|
"learning_rate": 3.6265794814711307e-07, |
|
"logits/chosen": -2.449399709701538, |
|
"logits/rejected": -2.439444065093994, |
|
"logps/chosen": -73.67487335205078, |
|
"logps/rejected": -76.30928039550781, |
|
"loss": 0.671, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.8954970240592957, |
|
"rewards/margins": 0.3529408872127533, |
|
"rewards/rejected": 0.5425562262535095, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.313588850174216, |
|
"grad_norm": 242.91718492869416, |
|
"learning_rate": 3.4731828363876876e-07, |
|
"logits/chosen": -2.4762511253356934, |
|
"logits/rejected": -2.4902822971343994, |
|
"logps/chosen": -64.53350830078125, |
|
"logps/rejected": -69.27278900146484, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.6519443988800049, |
|
"rewards/margins": 0.27337905764579773, |
|
"rewards/rejected": 0.37856537103652954, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"grad_norm": 261.21801117417044, |
|
"learning_rate": 3.297969931252667e-07, |
|
"logits/chosen": -2.466578483581543, |
|
"logits/rejected": -2.466921329498291, |
|
"logps/chosen": -73.65306091308594, |
|
"logps/rejected": -80.45304870605469, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.7281513214111328, |
|
"rewards/margins": 0.4587801396846771, |
|
"rewards/rejected": 0.2693712115287781, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_logits/chosen": -2.5556371212005615, |
|
"eval_logits/rejected": -2.539428472518921, |
|
"eval_logps/chosen": -72.68360900878906, |
|
"eval_logps/rejected": -80.29220581054688, |
|
"eval_loss": 0.6482642889022827, |
|
"eval_rewards/accuracies": 0.3511904776096344, |
|
"eval_rewards/chosen": 0.8134303092956543, |
|
"eval_rewards/margins": 0.43716490268707275, |
|
"eval_rewards/rejected": 0.376265287399292, |
|
"eval_runtime": 116.9271, |
|
"eval_samples_per_second": 17.105, |
|
"eval_steps_per_second": 0.539, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3832752613240418, |
|
"grad_norm": 287.07453268461984, |
|
"learning_rate": 3.103535481540892e-07, |
|
"logits/chosen": -2.494990587234497, |
|
"logits/rejected": -2.457904100418091, |
|
"logps/chosen": -71.97386169433594, |
|
"logps/rejected": -62.46455764770508, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.5382742881774902, |
|
"rewards/margins": 0.29256415367126465, |
|
"rewards/rejected": 0.24571006000041962, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4181184668989547, |
|
"grad_norm": 197.14505664650054, |
|
"learning_rate": 2.8927588532163986e-07, |
|
"logits/chosen": -2.5188653469085693, |
|
"logits/rejected": -2.486799955368042, |
|
"logps/chosen": -76.14109802246094, |
|
"logps/rejected": -66.33953857421875, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 0.5942131876945496, |
|
"rewards/margins": 0.44479331374168396, |
|
"rewards/rejected": 0.14941997826099396, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4529616724738676, |
|
"grad_norm": 309.7749734748652, |
|
"learning_rate": 2.6687614224062655e-07, |
|
"logits/chosen": -2.5562925338745117, |
|
"logits/rejected": -2.537079095840454, |
|
"logps/chosen": -82.76345825195312, |
|
"logps/rejected": -87.87675476074219, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": 0.5027960538864136, |
|
"rewards/margins": 0.6685007810592651, |
|
"rewards/rejected": -0.16570481657981873, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 207.92836576726873, |
|
"learning_rate": 2.434860351163114e-07, |
|
"logits/chosen": -2.4571573734283447, |
|
"logits/rejected": -2.445655345916748, |
|
"logps/chosen": -79.63760375976562, |
|
"logps/rejected": -70.80236053466797, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.678521454334259, |
|
"rewards/margins": 0.6956696510314941, |
|
"rewards/rejected": -0.017148202285170555, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"grad_norm": 266.02287575831133, |
|
"learning_rate": 2.194519463847738e-07, |
|
"logits/chosen": -2.522045612335205, |
|
"logits/rejected": -2.4781124591827393, |
|
"logps/chosen": -78.24339294433594, |
|
"logps/rejected": -79.53880310058594, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.5584267377853394, |
|
"rewards/margins": 0.5530349016189575, |
|
"rewards/rejected": 0.005391845945268869, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5574912891986062, |
|
"grad_norm": 250.7616411510584, |
|
"learning_rate": 1.951297951600021e-07, |
|
"logits/chosen": -2.4823291301727295, |
|
"logits/rejected": -2.5029988288879395, |
|
"logps/chosen": -63.5211067199707, |
|
"logps/rejected": -71.80608367919922, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": 0.45168429613113403, |
|
"rewards/margins": 0.3766103982925415, |
|
"rewards/rejected": 0.07507390528917313, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5923344947735192, |
|
"grad_norm": 247.2037562642855, |
|
"learning_rate": 1.7087976645299907e-07, |
|
"logits/chosen": -2.485832691192627, |
|
"logits/rejected": -2.471374034881592, |
|
"logps/chosen": -68.3868637084961, |
|
"logps/rejected": -76.93021392822266, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": 0.2612365484237671, |
|
"rewards/margins": 0.5385117530822754, |
|
"rewards/rejected": -0.2772751450538635, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.627177700348432, |
|
"grad_norm": 253.43980546820382, |
|
"learning_rate": 1.4706097721752126e-07, |
|
"logits/chosen": -2.5348095893859863, |
|
"logits/rejected": -2.523033618927002, |
|
"logps/chosen": -91.01544189453125, |
|
"logps/rejected": -86.55479431152344, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": 0.37877795100212097, |
|
"rewards/margins": 0.398730605840683, |
|
"rewards/rejected": -0.019952651113271713, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.662020905923345, |
|
"grad_norm": 199.96875567633583, |
|
"learning_rate": 1.240261582126029e-07, |
|
"logits/chosen": -2.5323076248168945, |
|
"logits/rejected": -2.5228934288024902, |
|
"logps/chosen": -70.24234771728516, |
|
"logps/rejected": -81.30900573730469, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.36411014199256897, |
|
"rewards/margins": 0.26548105478286743, |
|
"rewards/rejected": 0.09862907975912094, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"grad_norm": 331.81322846783684, |
|
"learning_rate": 1.0211643043778293e-07, |
|
"logits/chosen": -2.5657148361206055, |
|
"logits/rejected": -2.5730998516082764, |
|
"logps/chosen": -88.71566009521484, |
|
"logps/rejected": -91.1668930053711, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.8733166456222534, |
|
"rewards/margins": 0.4756090044975281, |
|
"rewards/rejected": 0.3977075219154358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"eval_logits/chosen": -2.541111707687378, |
|
"eval_logits/rejected": -2.5245211124420166, |
|
"eval_logps/chosen": -72.81554412841797, |
|
"eval_logps/rejected": -80.59257507324219, |
|
"eval_loss": 0.6494360566139221, |
|
"eval_rewards/accuracies": 0.3551587164402008, |
|
"eval_rewards/chosen": 0.7540590763092041, |
|
"eval_rewards/margins": 0.5129595994949341, |
|
"eval_rewards/rejected": 0.24109944701194763, |
|
"eval_runtime": 113.9336, |
|
"eval_samples_per_second": 17.554, |
|
"eval_steps_per_second": 0.553, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 387.8686320796793, |
|
"learning_rate": 8.165625349643729e-08, |
|
"logits/chosen": -2.5471763610839844, |
|
"logits/rejected": -2.5208678245544434, |
|
"logps/chosen": -67.77056884765625, |
|
"logps/rejected": -63.1707649230957, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.6968271136283875, |
|
"rewards/margins": 0.5916789770126343, |
|
"rewards/rejected": 0.10514805465936661, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7665505226480837, |
|
"grad_norm": 195.53605803419836, |
|
"learning_rate": 6.294862069654417e-08, |
|
"logits/chosen": -2.573523759841919, |
|
"logits/rejected": -2.5524840354919434, |
|
"logps/chosen": -71.91072082519531, |
|
"logps/rejected": -70.82460021972656, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.6963558197021484, |
|
"rewards/margins": 0.3189951777458191, |
|
"rewards/rejected": 0.37736067175865173, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8013937282229965, |
|
"grad_norm": 279.18152746545553, |
|
"learning_rate": 4.6270572044293563e-08, |
|
"logits/chosen": -2.5709030628204346, |
|
"logits/rejected": -2.5481770038604736, |
|
"logps/chosen": -88.14967346191406, |
|
"logps/rejected": -88.01075744628906, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.8329347372055054, |
|
"rewards/margins": 0.7707937359809875, |
|
"rewards/rejected": 0.06214110180735588, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8362369337979094, |
|
"grad_norm": 218.8441311845723, |
|
"learning_rate": 3.186909157830124e-08, |
|
"logits/chosen": -2.563115119934082, |
|
"logits/rejected": -2.5264599323272705, |
|
"logps/chosen": -85.11534881591797, |
|
"logps/rejected": -79.63955688476562, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.7318143248558044, |
|
"rewards/margins": 0.5083704590797424, |
|
"rewards/rejected": 0.223443865776062, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 242.6767041893939, |
|
"learning_rate": 1.9957449800512527e-08, |
|
"logits/chosen": -2.5801711082458496, |
|
"logits/rejected": -2.544325113296509, |
|
"logps/chosen": -93.85467529296875, |
|
"logps/rejected": -89.73026275634766, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.40625, |
|
"rewards/chosen": 0.7175648212432861, |
|
"rewards/margins": 0.6203989386558533, |
|
"rewards/rejected": 0.09716588258743286, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9059233449477352, |
|
"grad_norm": 197.6846790605052, |
|
"learning_rate": 1.0712045368478117e-08, |
|
"logits/chosen": -2.4915573596954346, |
|
"logits/rejected": -2.50749135017395, |
|
"logps/chosen": -57.76538848876953, |
|
"logps/rejected": -65.16616821289062, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.5777884721755981, |
|
"rewards/margins": 0.3698353171348572, |
|
"rewards/rejected": 0.20795314013957977, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9407665505226481, |
|
"grad_norm": 269.9944966251555, |
|
"learning_rate": 4.269792820155782e-09, |
|
"logits/chosen": -2.588334560394287, |
|
"logits/rejected": -2.5887222290039062, |
|
"logps/chosen": -67.41829681396484, |
|
"logps/rejected": -82.52043151855469, |
|
"loss": 0.6161, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.6578438878059387, |
|
"rewards/margins": 0.6527446508407593, |
|
"rewards/rejected": 0.005099198315292597, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 209.58164118406862, |
|
"learning_rate": 7.260950162363721e-10, |
|
"logits/chosen": -2.499262809753418, |
|
"logits/rejected": -2.475175380706787, |
|
"logps/chosen": -66.43128967285156, |
|
"logps/rejected": -70.41940307617188, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": 0.6313899159431458, |
|
"rewards/margins": 0.44463640451431274, |
|
"rewards/rejected": 0.18675348162651062, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 287, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6522094729885407, |
|
"train_runtime": 3372.3295, |
|
"train_samples_per_second": 5.438, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 287, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|