|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9158878504672896, |
|
"eval_steps": 50, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 61.25268073995668, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7241337299346924, |
|
"logits/rejected": -2.6918282508850098, |
|
"logps/chosen": -303.90643310546875, |
|
"logps/rejected": -234.9805450439453, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": 0.0160987488925457, |
|
"rewards/margins": 0.004555105231702328, |
|
"rewards/rejected": 0.011543644592165947, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 47.60898650679123, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.6647069454193115, |
|
"logits/rejected": -2.650399923324585, |
|
"logps/chosen": -269.51849365234375, |
|
"logps/rejected": -198.7647705078125, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.5727251768112183, |
|
"rewards/margins": 0.25209158658981323, |
|
"rewards/rejected": 0.3206337094306946, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 60.527379497145056, |
|
"learning_rate": 9.867190271803463e-07, |
|
"logits/chosen": -2.4945449829101562, |
|
"logits/rejected": -2.4840779304504395, |
|
"logps/chosen": -235.19595336914062, |
|
"logps/rejected": -213.60946655273438, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 1.5359665155410767, |
|
"rewards/margins": 0.8706293106079102, |
|
"rewards/rejected": 0.6653371453285217, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 53.828799786263204, |
|
"learning_rate": 9.475816456775312e-07, |
|
"logits/chosen": -2.4158647060394287, |
|
"logits/rejected": -2.3855679035186768, |
|
"logps/chosen": -268.0628662109375, |
|
"logps/rejected": -225.652587890625, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.9082000255584717, |
|
"rewards/margins": 1.4608235359191895, |
|
"rewards/rejected": 0.4473763406276703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 34.458805166311905, |
|
"learning_rate": 8.846669854914395e-07, |
|
"logits/chosen": -2.2879459857940674, |
|
"logits/rejected": -2.2494328022003174, |
|
"logps/chosen": -231.61703491210938, |
|
"logps/rejected": -187.62875366210938, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 1.543787956237793, |
|
"rewards/margins": 1.2806603908538818, |
|
"rewards/rejected": 0.2631274163722992, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.1214953271028036, |
|
"grad_norm": 27.103628504062815, |
|
"learning_rate": 8.013173181896282e-07, |
|
"logits/chosen": -2.277782917022705, |
|
"logits/rejected": -2.277600049972534, |
|
"logps/chosen": -259.5715637207031, |
|
"logps/rejected": -206.3133087158203, |
|
"loss": 0.3477, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 2.0770156383514404, |
|
"rewards/margins": 2.2314352989196777, |
|
"rewards/rejected": -0.15441982448101044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.308411214953271, |
|
"grad_norm": 21.639384944433875, |
|
"learning_rate": 7.019605024359474e-07, |
|
"logits/chosen": -2.276589870452881, |
|
"logits/rejected": -2.2846901416778564, |
|
"logps/chosen": -261.1808166503906, |
|
"logps/rejected": -219.38858032226562, |
|
"loss": 0.2558, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 2.4890947341918945, |
|
"rewards/margins": 3.4098620414733887, |
|
"rewards/rejected": -0.9207670092582703, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.4953271028037383, |
|
"grad_norm": 22.680670396225757, |
|
"learning_rate": 5.918747589082852e-07, |
|
"logits/chosen": -2.3969621658325195, |
|
"logits/rejected": -2.3342068195343018, |
|
"logps/chosen": -248.27743530273438, |
|
"logps/rejected": -212.25845336914062, |
|
"loss": 0.2736, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.3888657093048096, |
|
"rewards/margins": 3.1784424781799316, |
|
"rewards/rejected": -0.789576530456543, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.6822429906542056, |
|
"grad_norm": 24.61733178022545, |
|
"learning_rate": 4.769082706771303e-07, |
|
"logits/chosen": -2.397773265838623, |
|
"logits/rejected": -2.4283571243286133, |
|
"logps/chosen": -246.1250762939453, |
|
"logps/rejected": -229.49514770507812, |
|
"loss": 0.2653, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.4959254264831543, |
|
"rewards/margins": 3.4453117847442627, |
|
"rewards/rejected": -0.9493860006332397, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"grad_norm": 21.42871601886689, |
|
"learning_rate": 3.6316850496395855e-07, |
|
"logits/chosen": -2.4491772651672363, |
|
"logits/rejected": -2.4245429039001465, |
|
"logps/chosen": -275.09423828125, |
|
"logps/rejected": -236.69076538085938, |
|
"loss": 0.2912, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.785498857498169, |
|
"rewards/margins": 3.59558367729187, |
|
"rewards/rejected": -0.8100848197937012, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.8691588785046729, |
|
"eval_logits/chosen": -2.4286158084869385, |
|
"eval_logits/rejected": -2.4138076305389404, |
|
"eval_logps/chosen": -230.1865997314453, |
|
"eval_logps/rejected": -191.80255126953125, |
|
"eval_loss": 0.5428566336631775, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": 1.8078041076660156, |
|
"eval_rewards/margins": 1.948243260383606, |
|
"eval_rewards/rejected": -0.14043934643268585, |
|
"eval_runtime": 50.1691, |
|
"eval_samples_per_second": 15.149, |
|
"eval_steps_per_second": 0.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.05607476635514, |
|
"grad_norm": 19.534577835943573, |
|
"learning_rate": 2.566977607165719e-07, |
|
"logits/chosen": -2.450115442276001, |
|
"logits/rejected": -2.419285297393799, |
|
"logps/chosen": -247.16586303710938, |
|
"logps/rejected": -211.9401092529297, |
|
"loss": 0.2287, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.7968287467956543, |
|
"rewards/margins": 3.6083950996398926, |
|
"rewards/rejected": -0.8115667104721069, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.2429906542056073, |
|
"grad_norm": 15.96174358986932, |
|
"learning_rate": 1.631521781767214e-07, |
|
"logits/chosen": -2.4297375679016113, |
|
"logits/rejected": -2.407179355621338, |
|
"logps/chosen": -232.9438018798828, |
|
"logps/rejected": -221.9869384765625, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.696375846862793, |
|
"rewards/margins": 3.9043846130371094, |
|
"rewards/rejected": -1.2080087661743164, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.4299065420560746, |
|
"grad_norm": 16.25190830636716, |
|
"learning_rate": 8.75012627008489e-08, |
|
"logits/chosen": -2.460448741912842, |
|
"logits/rejected": -2.425128221511841, |
|
"logps/chosen": -258.5059814453125, |
|
"logps/rejected": -223.2895050048828, |
|
"loss": 0.1427, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": 3.1485841274261475, |
|
"rewards/margins": 4.165754795074463, |
|
"rewards/rejected": -1.017170786857605, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.616822429906542, |
|
"grad_norm": 15.19014195316505, |
|
"learning_rate": 3.376388529782215e-08, |
|
"logits/chosen": -2.4554247856140137, |
|
"logits/rejected": -2.421217679977417, |
|
"logps/chosen": -240.77627563476562, |
|
"logps/rejected": -223.79129028320312, |
|
"loss": 0.1737, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 2.8354218006134033, |
|
"rewards/margins": 4.4457106590271, |
|
"rewards/rejected": -1.6102889776229858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.803738317757009, |
|
"grad_norm": 20.62451875265718, |
|
"learning_rate": 4.794784562397458e-09, |
|
"logits/chosen": -2.437767505645752, |
|
"logits/rejected": -2.4236741065979004, |
|
"logps/chosen": -251.58251953125, |
|
"logps/rejected": -232.46157836914062, |
|
"loss": 0.159, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.0037763118743896, |
|
"rewards/margins": 4.042423248291016, |
|
"rewards/rejected": -1.038646936416626, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.9158878504672896, |
|
"step": 78, |
|
"total_flos": 919378820333568.0, |
|
"train_loss": 0.3483479917049408, |
|
"train_runtime": 3236.9548, |
|
"train_samples_per_second": 6.337, |
|
"train_steps_per_second": 0.024 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 919378820333568.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|