htlou's picture
Upload folder using huggingface_hub
958cdf4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9158878504672896,
"eval_steps": 50,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18691588785046728,
"grad_norm": 61.25268073995668,
"learning_rate": 5e-07,
"logits/chosen": -2.7241337299346924,
"logits/rejected": -2.6918282508850098,
"logps/chosen": -303.90643310546875,
"logps/rejected": -234.9805450439453,
"loss": 0.6903,
"rewards/accuracies": 0.29374998807907104,
"rewards/chosen": 0.0160987488925457,
"rewards/margins": 0.004555105231702328,
"rewards/rejected": 0.011543644592165947,
"step": 5
},
{
"epoch": 0.37383177570093457,
"grad_norm": 47.60898650679123,
"learning_rate": 1e-06,
"logits/chosen": -2.6647069454193115,
"logits/rejected": -2.650399923324585,
"logps/chosen": -269.51849365234375,
"logps/rejected": -198.7647705078125,
"loss": 0.6278,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": 0.5727251768112183,
"rewards/margins": 0.25209158658981323,
"rewards/rejected": 0.3206337094306946,
"step": 10
},
{
"epoch": 0.5607476635514018,
"grad_norm": 60.527379497145056,
"learning_rate": 9.867190271803463e-07,
"logits/chosen": -2.4945449829101562,
"logits/rejected": -2.4840779304504395,
"logps/chosen": -235.19595336914062,
"logps/rejected": -213.60946655273438,
"loss": 0.5799,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": 1.5359665155410767,
"rewards/margins": 0.8706293106079102,
"rewards/rejected": 0.6653371453285217,
"step": 15
},
{
"epoch": 0.7476635514018691,
"grad_norm": 53.828799786263204,
"learning_rate": 9.475816456775312e-07,
"logits/chosen": -2.4158647060394287,
"logits/rejected": -2.3855679035186768,
"logps/chosen": -268.0628662109375,
"logps/rejected": -225.652587890625,
"loss": 0.5684,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.9082000255584717,
"rewards/margins": 1.4608235359191895,
"rewards/rejected": 0.4473763406276703,
"step": 20
},
{
"epoch": 0.9345794392523364,
"grad_norm": 34.458805166311905,
"learning_rate": 8.846669854914395e-07,
"logits/chosen": -2.2879459857940674,
"logits/rejected": -2.2494328022003174,
"logps/chosen": -231.61703491210938,
"logps/rejected": -187.62875366210938,
"loss": 0.5395,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.543787956237793,
"rewards/margins": 1.2806603908538818,
"rewards/rejected": 0.2631274163722992,
"step": 25
},
{
"epoch": 1.1214953271028036,
"grad_norm": 27.103628504062815,
"learning_rate": 8.013173181896282e-07,
"logits/chosen": -2.277782917022705,
"logits/rejected": -2.277600049972534,
"logps/chosen": -259.5715637207031,
"logps/rejected": -206.3133087158203,
"loss": 0.3477,
"rewards/accuracies": 0.84375,
"rewards/chosen": 2.0770156383514404,
"rewards/margins": 2.2314352989196777,
"rewards/rejected": -0.15441982448101044,
"step": 30
},
{
"epoch": 1.308411214953271,
"grad_norm": 21.639384944433875,
"learning_rate": 7.019605024359474e-07,
"logits/chosen": -2.276589870452881,
"logits/rejected": -2.2846901416778564,
"logps/chosen": -261.1808166503906,
"logps/rejected": -219.38858032226562,
"loss": 0.2558,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.4890947341918945,
"rewards/margins": 3.4098620414733887,
"rewards/rejected": -0.9207670092582703,
"step": 35
},
{
"epoch": 1.4953271028037383,
"grad_norm": 22.680670396225757,
"learning_rate": 5.918747589082852e-07,
"logits/chosen": -2.3969621658325195,
"logits/rejected": -2.3342068195343018,
"logps/chosen": -248.27743530273438,
"logps/rejected": -212.25845336914062,
"loss": 0.2736,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.3888657093048096,
"rewards/margins": 3.1784424781799316,
"rewards/rejected": -0.789576530456543,
"step": 40
},
{
"epoch": 1.6822429906542056,
"grad_norm": 24.61733178022545,
"learning_rate": 4.769082706771303e-07,
"logits/chosen": -2.397773265838623,
"logits/rejected": -2.4283571243286133,
"logps/chosen": -246.1250762939453,
"logps/rejected": -229.49514770507812,
"loss": 0.2653,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 2.4959254264831543,
"rewards/margins": 3.4453117847442627,
"rewards/rejected": -0.9493860006332397,
"step": 45
},
{
"epoch": 1.8691588785046729,
"grad_norm": 21.42871601886689,
"learning_rate": 3.6316850496395855e-07,
"logits/chosen": -2.4491772651672363,
"logits/rejected": -2.4245429039001465,
"logps/chosen": -275.09423828125,
"logps/rejected": -236.69076538085938,
"loss": 0.2912,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 2.785498857498169,
"rewards/margins": 3.59558367729187,
"rewards/rejected": -0.8100848197937012,
"step": 50
},
{
"epoch": 1.8691588785046729,
"eval_logits/chosen": -2.4286158084869385,
"eval_logits/rejected": -2.4138076305389404,
"eval_logps/chosen": -230.1865997314453,
"eval_logps/rejected": -191.80255126953125,
"eval_loss": 0.5428566336631775,
"eval_rewards/accuracies": 0.78125,
"eval_rewards/chosen": 1.8078041076660156,
"eval_rewards/margins": 1.948243260383606,
"eval_rewards/rejected": -0.14043934643268585,
"eval_runtime": 50.1691,
"eval_samples_per_second": 15.149,
"eval_steps_per_second": 0.239,
"step": 50
},
{
"epoch": 2.05607476635514,
"grad_norm": 19.534577835943573,
"learning_rate": 2.566977607165719e-07,
"logits/chosen": -2.450115442276001,
"logits/rejected": -2.419285297393799,
"logps/chosen": -247.16586303710938,
"logps/rejected": -211.9401092529297,
"loss": 0.2287,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 2.7968287467956543,
"rewards/margins": 3.6083950996398926,
"rewards/rejected": -0.8115667104721069,
"step": 55
},
{
"epoch": 2.2429906542056073,
"grad_norm": 15.96174358986932,
"learning_rate": 1.631521781767214e-07,
"logits/chosen": -2.4297375679016113,
"logits/rejected": -2.407179355621338,
"logps/chosen": -232.9438018798828,
"logps/rejected": -221.9869384765625,
"loss": 0.1645,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.696375846862793,
"rewards/margins": 3.9043846130371094,
"rewards/rejected": -1.2080087661743164,
"step": 60
},
{
"epoch": 2.4299065420560746,
"grad_norm": 16.25190830636716,
"learning_rate": 8.75012627008489e-08,
"logits/chosen": -2.460448741912842,
"logits/rejected": -2.425128221511841,
"logps/chosen": -258.5059814453125,
"logps/rejected": -223.2895050048828,
"loss": 0.1427,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 3.1485841274261475,
"rewards/margins": 4.165754795074463,
"rewards/rejected": -1.017170786857605,
"step": 65
},
{
"epoch": 2.616822429906542,
"grad_norm": 15.19014195316505,
"learning_rate": 3.376388529782215e-08,
"logits/chosen": -2.4554247856140137,
"logits/rejected": -2.421217679977417,
"logps/chosen": -240.77627563476562,
"logps/rejected": -223.79129028320312,
"loss": 0.1737,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.8354218006134033,
"rewards/margins": 4.4457106590271,
"rewards/rejected": -1.6102889776229858,
"step": 70
},
{
"epoch": 2.803738317757009,
"grad_norm": 20.62451875265718,
"learning_rate": 4.794784562397458e-09,
"logits/chosen": -2.437767505645752,
"logits/rejected": -2.4236741065979004,
"logps/chosen": -251.58251953125,
"logps/rejected": -232.46157836914062,
"loss": 0.159,
"rewards/accuracies": 0.9375,
"rewards/chosen": 3.0037763118743896,
"rewards/margins": 4.042423248291016,
"rewards/rejected": -1.038646936416626,
"step": 75
},
{
"epoch": 2.9158878504672896,
"step": 78,
"total_flos": 919378820333568.0,
"train_loss": 0.3483479917049408,
"train_runtime": 3236.9548,
"train_samples_per_second": 6.337,
"train_steps_per_second": 0.024
}
],
"logging_steps": 5,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 919378820333568.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}