|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.981366459627329, |
|
"eval_steps": 50, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12422360248447205, |
|
"grad_norm": 65.02782550737439, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7251429557800293, |
|
"logits/rejected": -2.70817494392395, |
|
"logps/chosen": -262.96563720703125, |
|
"logps/rejected": -182.58338928222656, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.01599644497036934, |
|
"rewards/margins": 0.006512208841741085, |
|
"rewards/rejected": 0.009484234265983105, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2484472049689441, |
|
"grad_norm": 48.3464252705335, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6999757289886475, |
|
"logits/rejected": -2.6889986991882324, |
|
"logps/chosen": -268.0428771972656, |
|
"logps/rejected": -197.49484252929688, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.6579615473747253, |
|
"rewards/margins": 0.32934561371803284, |
|
"rewards/rejected": 0.3286159038543701, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37267080745341613, |
|
"grad_norm": 53.44161973291976, |
|
"learning_rate": 9.949107209404663e-07, |
|
"logits/chosen": -2.5064568519592285, |
|
"logits/rejected": -2.495807647705078, |
|
"logps/chosen": -242.85745239257812, |
|
"logps/rejected": -196.7808074951172, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 1.777621865272522, |
|
"rewards/margins": 0.5862057209014893, |
|
"rewards/rejected": 1.1914160251617432, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.4968944099378882, |
|
"grad_norm": 47.682650591660796, |
|
"learning_rate": 9.797464868072486e-07, |
|
"logits/chosen": -2.3988099098205566, |
|
"logits/rejected": -2.3708558082580566, |
|
"logps/chosen": -249.437255859375, |
|
"logps/rejected": -210.55868530273438, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 1.9457979202270508, |
|
"rewards/margins": 1.0106760263442993, |
|
"rewards/rejected": 0.9351221323013306, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6211180124223602, |
|
"grad_norm": 39.33765044259658, |
|
"learning_rate": 9.548159976772592e-07, |
|
"logits/chosen": -2.241548538208008, |
|
"logits/rejected": -2.1949617862701416, |
|
"logps/chosen": -244.6901397705078, |
|
"logps/rejected": -199.38278198242188, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 1.7841804027557373, |
|
"rewards/margins": 1.113467812538147, |
|
"rewards/rejected": 0.6707127094268799, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7453416149068323, |
|
"grad_norm": 33.38735708969768, |
|
"learning_rate": 9.206267664155906e-07, |
|
"logits/chosen": -2.226879119873047, |
|
"logits/rejected": -2.1980159282684326, |
|
"logps/chosen": -261.0025329589844, |
|
"logps/rejected": -201.90567016601562, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.9629528522491455, |
|
"rewards/margins": 1.5217477083206177, |
|
"rewards/rejected": 0.44120508432388306, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8695652173913043, |
|
"grad_norm": 40.7877020563695, |
|
"learning_rate": 8.778747871771291e-07, |
|
"logits/chosen": -2.303542375564575, |
|
"logits/rejected": -2.286126136779785, |
|
"logps/chosen": -282.151611328125, |
|
"logps/rejected": -185.19766235351562, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 2.344021797180176, |
|
"rewards/margins": 2.0376055240631104, |
|
"rewards/rejected": 0.30641618371009827, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9937888198757764, |
|
"grad_norm": 44.86549702944521, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": -2.4969606399536133, |
|
"logits/rejected": -2.464599847793579, |
|
"logps/chosen": -244.0958709716797, |
|
"logps/rejected": -183.03262329101562, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.7189185619354248, |
|
"rewards/margins": 1.3635226488113403, |
|
"rewards/rejected": 0.3553960919380188, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1180124223602483, |
|
"grad_norm": 19.636732005851645, |
|
"learning_rate": 7.703204087277988e-07, |
|
"logits/chosen": -2.5191662311553955, |
|
"logits/rejected": -2.48410701751709, |
|
"logps/chosen": -228.06103515625, |
|
"logps/rejected": -196.45220947265625, |
|
"loss": 0.232, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.1026101112365723, |
|
"rewards/margins": 2.677381992340088, |
|
"rewards/rejected": -0.5747714042663574, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"grad_norm": 17.158865977777797, |
|
"learning_rate": 7.077075065009433e-07, |
|
"logits/chosen": -2.4344613552093506, |
|
"logits/rejected": -2.4072365760803223, |
|
"logps/chosen": -227.47802734375, |
|
"logps/rejected": -203.6940155029297, |
|
"loss": 0.2648, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.5955851078033447, |
|
"rewards/margins": 3.1633975505828857, |
|
"rewards/rejected": -0.567812442779541, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.2422360248447206, |
|
"eval_logits/chosen": -2.338369846343994, |
|
"eval_logits/rejected": -2.3208906650543213, |
|
"eval_logps/chosen": -237.45626831054688, |
|
"eval_logps/rejected": -245.1334686279297, |
|
"eval_loss": 0.5693262219429016, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": 2.0296123027801514, |
|
"eval_rewards/margins": 2.5231986045837402, |
|
"eval_rewards/rejected": -0.4935866892337799, |
|
"eval_runtime": 77.4881, |
|
"eval_samples_per_second": 14.712, |
|
"eval_steps_per_second": 0.232, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3664596273291925, |
|
"grad_norm": 24.70055582817972, |
|
"learning_rate": 6.408662784207149e-07, |
|
"logits/chosen": -2.281872272491455, |
|
"logits/rejected": -2.279301881790161, |
|
"logps/chosen": -233.04708862304688, |
|
"logps/rejected": -215.3188934326172, |
|
"loss": 0.2485, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 2.539990186691284, |
|
"rewards/margins": 3.1891541481018066, |
|
"rewards/rejected": -0.6491641402244568, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4906832298136645, |
|
"grad_norm": 26.10474371944714, |
|
"learning_rate": 5.711574191366427e-07, |
|
"logits/chosen": -2.2054412364959717, |
|
"logits/rejected": -2.202352523803711, |
|
"logps/chosen": -234.28701782226562, |
|
"logps/rejected": -213.38143920898438, |
|
"loss": 0.2623, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 2.3911938667297363, |
|
"rewards/margins": 3.317472457885742, |
|
"rewards/rejected": -0.9262781143188477, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.6149068322981366, |
|
"grad_norm": 22.51894757334605, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.232825756072998, |
|
"logits/rejected": -2.171570301055908, |
|
"logps/chosen": -241.08193969726562, |
|
"logps/rejected": -209.3240966796875, |
|
"loss": 0.2633, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.3284173011779785, |
|
"rewards/margins": 3.415198564529419, |
|
"rewards/rejected": -1.08678138256073, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.7391304347826086, |
|
"grad_norm": 22.492973391255987, |
|
"learning_rate": 4.2884258086335745e-07, |
|
"logits/chosen": -2.169712543487549, |
|
"logits/rejected": -2.1445705890655518, |
|
"logps/chosen": -232.45443725585938, |
|
"logps/rejected": -222.10830688476562, |
|
"loss": 0.2837, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.3141350746154785, |
|
"rewards/margins": 3.6101813316345215, |
|
"rewards/rejected": -1.2960463762283325, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8633540372670807, |
|
"grad_norm": 21.249146991119204, |
|
"learning_rate": 3.591337215792851e-07, |
|
"logits/chosen": -2.073169469833374, |
|
"logits/rejected": -2.0878424644470215, |
|
"logps/chosen": -241.75015258789062, |
|
"logps/rejected": -206.7108154296875, |
|
"loss": 0.2855, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.4120543003082275, |
|
"rewards/margins": 3.6116981506347656, |
|
"rewards/rejected": -1.1996442079544067, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9875776397515528, |
|
"grad_norm": 23.84694325653629, |
|
"learning_rate": 2.922924934990568e-07, |
|
"logits/chosen": -2.0345282554626465, |
|
"logits/rejected": -2.0104496479034424, |
|
"logps/chosen": -230.2379913330078, |
|
"logps/rejected": -220.3455810546875, |
|
"loss": 0.2806, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.359086036682129, |
|
"rewards/margins": 3.5737903118133545, |
|
"rewards/rejected": -1.214704155921936, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.111801242236025, |
|
"grad_norm": 17.085591816665293, |
|
"learning_rate": 2.2967959127220137e-07, |
|
"logits/chosen": -2.0804636478424072, |
|
"logits/rejected": -2.0491340160369873, |
|
"logps/chosen": -236.104736328125, |
|
"logps/rejected": -225.11471557617188, |
|
"loss": 0.1502, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.9891457557678223, |
|
"rewards/margins": 4.1706223487854, |
|
"rewards/rejected": -1.1814768314361572, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2360248447204967, |
|
"grad_norm": 18.158251153184455, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": -2.092092514038086, |
|
"logits/rejected": -2.0654916763305664, |
|
"logps/chosen": -235.83578491210938, |
|
"logps/rejected": -234.26809692382812, |
|
"loss": 0.1426, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.776648759841919, |
|
"rewards/margins": 4.275698661804199, |
|
"rewards/rejected": -1.499050259590149, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.360248447204969, |
|
"grad_norm": 15.808936697383274, |
|
"learning_rate": 1.2212521282287093e-07, |
|
"logits/chosen": -2.0797340869903564, |
|
"logits/rejected": -2.0396041870117188, |
|
"logps/chosen": -242.691162109375, |
|
"logps/rejected": -201.09994506835938, |
|
"loss": 0.1446, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 2.843183994293213, |
|
"rewards/margins": 3.8034491539001465, |
|
"rewards/rejected": -0.9602655172348022, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"grad_norm": 18.76592353835065, |
|
"learning_rate": 7.937323358440934e-08, |
|
"logits/chosen": -2.139120578765869, |
|
"logits/rejected": -2.091543674468994, |
|
"logps/chosen": -231.04006958007812, |
|
"logps/rejected": -220.3035888671875, |
|
"loss": 0.1487, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.8384525775909424, |
|
"rewards/margins": 4.084644317626953, |
|
"rewards/rejected": -1.246191382408142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4844720496894412, |
|
"eval_logits/chosen": -2.1241261959075928, |
|
"eval_logits/rejected": -2.093987226486206, |
|
"eval_logps/chosen": -241.23301696777344, |
|
"eval_logps/rejected": -252.7169647216797, |
|
"eval_loss": 0.5524086356163025, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": 1.651939868927002, |
|
"eval_rewards/margins": 2.903874397277832, |
|
"eval_rewards/rejected": -1.2519348859786987, |
|
"eval_runtime": 76.7688, |
|
"eval_samples_per_second": 14.85, |
|
"eval_steps_per_second": 0.234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.608695652173913, |
|
"grad_norm": 20.331751556664074, |
|
"learning_rate": 4.518400232274078e-08, |
|
"logits/chosen": -2.1376147270202637, |
|
"logits/rejected": -2.092000961303711, |
|
"logps/chosen": -239.6346435546875, |
|
"logps/rejected": -220.95663452148438, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.8724117279052734, |
|
"rewards/margins": 4.305968761444092, |
|
"rewards/rejected": -1.4335569143295288, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.732919254658385, |
|
"grad_norm": 16.10120397999774, |
|
"learning_rate": 2.025351319275137e-08, |
|
"logits/chosen": -2.1425278186798096, |
|
"logits/rejected": -2.1018919944763184, |
|
"logps/chosen": -245.0784149169922, |
|
"logps/rejected": -212.120361328125, |
|
"loss": 0.1297, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.7527151107788086, |
|
"rewards/margins": 4.1609883308410645, |
|
"rewards/rejected": -1.408272624015808, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 15.75267796946239, |
|
"learning_rate": 5.0892790595336575e-09, |
|
"logits/chosen": -2.1198809146881104, |
|
"logits/rejected": -2.1020970344543457, |
|
"logps/chosen": -221.77182006835938, |
|
"logps/rejected": -220.3450927734375, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 2.698946952819824, |
|
"rewards/margins": 4.416926383972168, |
|
"rewards/rejected": -1.7179794311523438, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.981366459627329, |
|
"grad_norm": 17.27205952924189, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -2.1219396591186523, |
|
"logits/rejected": -2.1288697719573975, |
|
"logps/chosen": -226.1340789794922, |
|
"logps/rejected": -211.1949462890625, |
|
"loss": 0.1647, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 2.3404417037963867, |
|
"rewards/margins": 3.8494620323181152, |
|
"rewards/rejected": -1.5090203285217285, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.981366459627329, |
|
"step": 120, |
|
"total_flos": 1414680891359232.0, |
|
"train_loss": 0.3361198857426643, |
|
"train_runtime": 4442.0256, |
|
"train_samples_per_second": 6.927, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1414680891359232.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|