htlou's picture
Upload folder using huggingface_hub
a8c09e4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.981366459627329,
"eval_steps": 50,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12422360248447205,
"grad_norm": 65.02782550737439,
"learning_rate": 5e-07,
"logits/chosen": -2.7251429557800293,
"logits/rejected": -2.70817494392395,
"logps/chosen": -262.96563720703125,
"logps/rejected": -182.58338928222656,
"loss": 0.6897,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.01599644497036934,
"rewards/margins": 0.006512208841741085,
"rewards/rejected": 0.009484234265983105,
"step": 5
},
{
"epoch": 0.2484472049689441,
"grad_norm": 48.3464252705335,
"learning_rate": 1e-06,
"logits/chosen": -2.6999757289886475,
"logits/rejected": -2.6889986991882324,
"logps/chosen": -268.0428771972656,
"logps/rejected": -197.49484252929688,
"loss": 0.6238,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.6579615473747253,
"rewards/margins": 0.32934561371803284,
"rewards/rejected": 0.3286159038543701,
"step": 10
},
{
"epoch": 0.37267080745341613,
"grad_norm": 53.44161973291976,
"learning_rate": 9.949107209404663e-07,
"logits/chosen": -2.5064568519592285,
"logits/rejected": -2.495807647705078,
"logps/chosen": -242.85745239257812,
"logps/rejected": -196.7808074951172,
"loss": 0.616,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 1.777621865272522,
"rewards/margins": 0.5862057209014893,
"rewards/rejected": 1.1914160251617432,
"step": 15
},
{
"epoch": 0.4968944099378882,
"grad_norm": 47.682650591660796,
"learning_rate": 9.797464868072486e-07,
"logits/chosen": -2.3988099098205566,
"logits/rejected": -2.3708558082580566,
"logps/chosen": -249.437255859375,
"logps/rejected": -210.55868530273438,
"loss": 0.5906,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 1.9457979202270508,
"rewards/margins": 1.0106760263442993,
"rewards/rejected": 0.9351221323013306,
"step": 20
},
{
"epoch": 0.6211180124223602,
"grad_norm": 39.33765044259658,
"learning_rate": 9.548159976772592e-07,
"logits/chosen": -2.241548538208008,
"logits/rejected": -2.1949617862701416,
"logps/chosen": -244.6901397705078,
"logps/rejected": -199.38278198242188,
"loss": 0.594,
"rewards/accuracies": 0.71875,
"rewards/chosen": 1.7841804027557373,
"rewards/margins": 1.113467812538147,
"rewards/rejected": 0.6707127094268799,
"step": 25
},
{
"epoch": 0.7453416149068323,
"grad_norm": 33.38735708969768,
"learning_rate": 9.206267664155906e-07,
"logits/chosen": -2.226879119873047,
"logits/rejected": -2.1980159282684326,
"logps/chosen": -261.0025329589844,
"logps/rejected": -201.90567016601562,
"loss": 0.5731,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": 1.9629528522491455,
"rewards/margins": 1.5217477083206177,
"rewards/rejected": 0.44120508432388306,
"step": 30
},
{
"epoch": 0.8695652173913043,
"grad_norm": 40.7877020563695,
"learning_rate": 8.778747871771291e-07,
"logits/chosen": -2.303542375564575,
"logits/rejected": -2.286126136779785,
"logps/chosen": -282.151611328125,
"logps/rejected": -185.19766235351562,
"loss": 0.5745,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 2.344021797180176,
"rewards/margins": 2.0376055240631104,
"rewards/rejected": 0.30641618371009827,
"step": 35
},
{
"epoch": 0.9937888198757764,
"grad_norm": 44.86549702944521,
"learning_rate": 8.274303669726426e-07,
"logits/chosen": -2.4969606399536133,
"logits/rejected": -2.464599847793579,
"logps/chosen": -244.0958709716797,
"logps/rejected": -183.03262329101562,
"loss": 0.5178,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.7189185619354248,
"rewards/margins": 1.3635226488113403,
"rewards/rejected": 0.3553960919380188,
"step": 40
},
{
"epoch": 1.1180124223602483,
"grad_norm": 19.636732005851645,
"learning_rate": 7.703204087277988e-07,
"logits/chosen": -2.5191662311553955,
"logits/rejected": -2.48410701751709,
"logps/chosen": -228.06103515625,
"logps/rejected": -196.45220947265625,
"loss": 0.232,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.1026101112365723,
"rewards/margins": 2.677381992340088,
"rewards/rejected": -0.5747714042663574,
"step": 45
},
{
"epoch": 1.2422360248447206,
"grad_norm": 17.158865977777797,
"learning_rate": 7.077075065009433e-07,
"logits/chosen": -2.4344613552093506,
"logits/rejected": -2.4072365760803223,
"logps/chosen": -227.47802734375,
"logps/rejected": -203.6940155029297,
"loss": 0.2648,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.5955851078033447,
"rewards/margins": 3.1633975505828857,
"rewards/rejected": -0.567812442779541,
"step": 50
},
{
"epoch": 1.2422360248447206,
"eval_logits/chosen": -2.338369846343994,
"eval_logits/rejected": -2.3208906650543213,
"eval_logps/chosen": -237.45626831054688,
"eval_logps/rejected": -245.1334686279297,
"eval_loss": 0.5693262219429016,
"eval_rewards/accuracies": 0.8125,
"eval_rewards/chosen": 2.0296123027801514,
"eval_rewards/margins": 2.5231986045837402,
"eval_rewards/rejected": -0.4935866892337799,
"eval_runtime": 77.4881,
"eval_samples_per_second": 14.712,
"eval_steps_per_second": 0.232,
"step": 50
},
{
"epoch": 1.3664596273291925,
"grad_norm": 24.70055582817972,
"learning_rate": 6.408662784207149e-07,
"logits/chosen": -2.281872272491455,
"logits/rejected": -2.279301881790161,
"logps/chosen": -233.04708862304688,
"logps/rejected": -215.3188934326172,
"loss": 0.2485,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 2.539990186691284,
"rewards/margins": 3.1891541481018066,
"rewards/rejected": -0.6491641402244568,
"step": 55
},
{
"epoch": 1.4906832298136645,
"grad_norm": 26.10474371944714,
"learning_rate": 5.711574191366427e-07,
"logits/chosen": -2.2054412364959717,
"logits/rejected": -2.202352523803711,
"logps/chosen": -234.28701782226562,
"logps/rejected": -213.38143920898438,
"loss": 0.2623,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 2.3911938667297363,
"rewards/margins": 3.317472457885742,
"rewards/rejected": -0.9262781143188477,
"step": 60
},
{
"epoch": 1.6149068322981366,
"grad_norm": 22.51894757334605,
"learning_rate": 5e-07,
"logits/chosen": -2.232825756072998,
"logits/rejected": -2.171570301055908,
"logps/chosen": -241.08193969726562,
"logps/rejected": -209.3240966796875,
"loss": 0.2633,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 2.3284173011779785,
"rewards/margins": 3.415198564529419,
"rewards/rejected": -1.08678138256073,
"step": 65
},
{
"epoch": 1.7391304347826086,
"grad_norm": 22.492973391255987,
"learning_rate": 4.2884258086335745e-07,
"logits/chosen": -2.169712543487549,
"logits/rejected": -2.1445705890655518,
"logps/chosen": -232.45443725585938,
"logps/rejected": -222.10830688476562,
"loss": 0.2837,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.3141350746154785,
"rewards/margins": 3.6101813316345215,
"rewards/rejected": -1.2960463762283325,
"step": 70
},
{
"epoch": 1.8633540372670807,
"grad_norm": 21.249146991119204,
"learning_rate": 3.591337215792851e-07,
"logits/chosen": -2.073169469833374,
"logits/rejected": -2.0878424644470215,
"logps/chosen": -241.75015258789062,
"logps/rejected": -206.7108154296875,
"loss": 0.2855,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 2.4120543003082275,
"rewards/margins": 3.6116981506347656,
"rewards/rejected": -1.1996442079544067,
"step": 75
},
{
"epoch": 1.9875776397515528,
"grad_norm": 23.84694325653629,
"learning_rate": 2.922924934990568e-07,
"logits/chosen": -2.0345282554626465,
"logits/rejected": -2.0104496479034424,
"logps/chosen": -230.2379913330078,
"logps/rejected": -220.3455810546875,
"loss": 0.2806,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.359086036682129,
"rewards/margins": 3.5737903118133545,
"rewards/rejected": -1.214704155921936,
"step": 80
},
{
"epoch": 2.111801242236025,
"grad_norm": 17.085591816665293,
"learning_rate": 2.2967959127220137e-07,
"logits/chosen": -2.0804636478424072,
"logits/rejected": -2.0491340160369873,
"logps/chosen": -236.104736328125,
"logps/rejected": -225.11471557617188,
"loss": 0.1502,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 2.9891457557678223,
"rewards/margins": 4.1706223487854,
"rewards/rejected": -1.1814768314361572,
"step": 85
},
{
"epoch": 2.2360248447204967,
"grad_norm": 18.158251153184455,
"learning_rate": 1.725696330273575e-07,
"logits/chosen": -2.092092514038086,
"logits/rejected": -2.0654916763305664,
"logps/chosen": -235.83578491210938,
"logps/rejected": -234.26809692382812,
"loss": 0.1426,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 2.776648759841919,
"rewards/margins": 4.275698661804199,
"rewards/rejected": -1.499050259590149,
"step": 90
},
{
"epoch": 2.360248447204969,
"grad_norm": 15.808936697383274,
"learning_rate": 1.2212521282287093e-07,
"logits/chosen": -2.0797340869903564,
"logits/rejected": -2.0396041870117188,
"logps/chosen": -242.691162109375,
"logps/rejected": -201.09994506835938,
"loss": 0.1446,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 2.843183994293213,
"rewards/margins": 3.8034491539001465,
"rewards/rejected": -0.9602655172348022,
"step": 95
},
{
"epoch": 2.4844720496894412,
"grad_norm": 18.76592353835065,
"learning_rate": 7.937323358440934e-08,
"logits/chosen": -2.139120578765869,
"logits/rejected": -2.091543674468994,
"logps/chosen": -231.04006958007812,
"logps/rejected": -220.3035888671875,
"loss": 0.1487,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 2.8384525775909424,
"rewards/margins": 4.084644317626953,
"rewards/rejected": -1.246191382408142,
"step": 100
},
{
"epoch": 2.4844720496894412,
"eval_logits/chosen": -2.1241261959075928,
"eval_logits/rejected": -2.093987226486206,
"eval_logps/chosen": -241.23301696777344,
"eval_logps/rejected": -252.7169647216797,
"eval_loss": 0.5524086356163025,
"eval_rewards/accuracies": 0.8125,
"eval_rewards/chosen": 1.651939868927002,
"eval_rewards/margins": 2.903874397277832,
"eval_rewards/rejected": -1.2519348859786987,
"eval_runtime": 76.7688,
"eval_samples_per_second": 14.85,
"eval_steps_per_second": 0.234,
"step": 100
},
{
"epoch": 2.608695652173913,
"grad_norm": 20.331751556664074,
"learning_rate": 4.518400232274078e-08,
"logits/chosen": -2.1376147270202637,
"logits/rejected": -2.092000961303711,
"logps/chosen": -239.6346435546875,
"logps/rejected": -220.95663452148438,
"loss": 0.1515,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 2.8724117279052734,
"rewards/margins": 4.305968761444092,
"rewards/rejected": -1.4335569143295288,
"step": 105
},
{
"epoch": 2.732919254658385,
"grad_norm": 16.10120397999774,
"learning_rate": 2.025351319275137e-08,
"logits/chosen": -2.1425278186798096,
"logits/rejected": -2.1018919944763184,
"logps/chosen": -245.0784149169922,
"logps/rejected": -212.120361328125,
"loss": 0.1297,
"rewards/accuracies": 0.9375,
"rewards/chosen": 2.7527151107788086,
"rewards/margins": 4.1609883308410645,
"rewards/rejected": -1.408272624015808,
"step": 110
},
{
"epoch": 2.857142857142857,
"grad_norm": 15.75267796946239,
"learning_rate": 5.0892790595336575e-09,
"logits/chosen": -2.1198809146881104,
"logits/rejected": -2.1020970344543457,
"logps/chosen": -221.77182006835938,
"logps/rejected": -220.3450927734375,
"loss": 0.1345,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 2.698946952819824,
"rewards/margins": 4.416926383972168,
"rewards/rejected": -1.7179794311523438,
"step": 115
},
{
"epoch": 2.981366459627329,
"grad_norm": 17.27205952924189,
"learning_rate": 0.0,
"logits/chosen": -2.1219396591186523,
"logits/rejected": -2.1288697719573975,
"logps/chosen": -226.1340789794922,
"logps/rejected": -211.1949462890625,
"loss": 0.1647,
"rewards/accuracies": 0.9375,
"rewards/chosen": 2.3404417037963867,
"rewards/margins": 3.8494620323181152,
"rewards/rejected": -1.5090203285217285,
"step": 120
},
{
"epoch": 2.981366459627329,
"step": 120,
"total_flos": 1414680891359232.0,
"train_loss": 0.3361198857426643,
"train_runtime": 4442.0256,
"train_samples_per_second": 6.927,
"train_steps_per_second": 0.027
}
],
"logging_steps": 5,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1414680891359232.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}