gemma-2b-ForexAI / checkpoint-396 /trainer_state.json
mjmanashti's picture
Upload folder using huggingface_hub
dea369b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 396,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"grad_norm": 0.0012223966186866164,
"learning_rate": 1.8750000000000002e-05,
"logits/chosen": -22.664844512939453,
"logits/rejected": -22.80691909790039,
"logps/chosen": -81.01699829101562,
"logps/rejected": -101.25294494628906,
"loss": 0.2072,
"rewards/accuracies": 0.8846153616905212,
"rewards/chosen": 2.5555355548858643,
"rewards/margins": 4.414959907531738,
"rewards/rejected": -1.859424352645874,
"step": 26
},
{
"epoch": 0.39,
"grad_norm": 1.8405307855573483e-05,
"learning_rate": 2.9073033707865168e-05,
"logits/chosen": -23.12621307373047,
"logits/rejected": -23.24854278564453,
"logps/chosen": -43.78964614868164,
"logps/rejected": -156.3304901123047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.3214497566223145,
"rewards/margins": 13.642704010009766,
"rewards/rejected": -7.321253776550293,
"step": 52
},
{
"epoch": 0.59,
"grad_norm": 1.5625999367330223e-05,
"learning_rate": 2.6882022471910113e-05,
"logits/chosen": -23.210811614990234,
"logits/rejected": -23.32987403869629,
"logps/chosen": -41.96815490722656,
"logps/rejected": -167.406982421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.477440357208252,
"rewards/margins": 14.927824020385742,
"rewards/rejected": -8.450382232666016,
"step": 78
},
{
"epoch": 0.79,
"grad_norm": 1.5885076209087856e-05,
"learning_rate": 2.4691011235955056e-05,
"logits/chosen": -23.275333404541016,
"logits/rejected": -23.39052391052246,
"logps/chosen": -41.763607025146484,
"logps/rejected": -169.25025939941406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4945831298828125,
"rewards/margins": 15.126973152160645,
"rewards/rejected": -8.632390975952148,
"step": 104
},
{
"epoch": 0.98,
"grad_norm": 0.00013870897237211466,
"learning_rate": 2.25e-05,
"logits/chosen": -23.342487335205078,
"logits/rejected": -23.45945167541504,
"logps/chosen": -41.83483123779297,
"logps/rejected": -169.22845458984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.479532718658447,
"rewards/margins": 15.119216918945312,
"rewards/rejected": -8.63968276977539,
"step": 130
},
{
"epoch": 1.18,
"grad_norm": 1.5738529327791184e-05,
"learning_rate": 2.0308988764044947e-05,
"logits/chosen": -23.253267288208008,
"logits/rejected": -23.370222091674805,
"logps/chosen": -41.68398666381836,
"logps/rejected": -169.04989624023438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4891557693481445,
"rewards/margins": 15.119135856628418,
"rewards/rejected": -8.629980087280273,
"step": 156
},
{
"epoch": 1.38,
"grad_norm": 1.4681028005725238e-05,
"learning_rate": 1.8117977528089886e-05,
"logits/chosen": -23.281639099121094,
"logits/rejected": -23.397907257080078,
"logps/chosen": -41.55263137817383,
"logps/rejected": -170.5806427001953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.5376057624816895,
"rewards/margins": 15.297250747680664,
"rewards/rejected": -8.759647369384766,
"step": 182
},
{
"epoch": 1.58,
"grad_norm": 1.823231104935985e-05,
"learning_rate": 1.5926966292134832e-05,
"logits/chosen": -23.312273025512695,
"logits/rejected": -23.43006706237793,
"logps/chosen": -42.09364318847656,
"logps/rejected": -168.72459411621094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.430633544921875,
"rewards/margins": 15.028059959411621,
"rewards/rejected": -8.597426414489746,
"step": 208
},
{
"epoch": 1.77,
"grad_norm": 1.3677333299710881e-05,
"learning_rate": 1.3735955056179776e-05,
"logits/chosen": -23.281251907348633,
"logits/rejected": -23.39859390258789,
"logps/chosen": -41.60188293457031,
"logps/rejected": -169.98341369628906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.554170608520508,
"rewards/margins": 15.241110801696777,
"rewards/rejected": -8.686941146850586,
"step": 234
},
{
"epoch": 1.97,
"grad_norm": 1.329195401922334e-05,
"learning_rate": 1.154494382022472e-05,
"logits/chosen": -23.316593170166016,
"logits/rejected": -23.435791015625,
"logps/chosen": -41.78284454345703,
"logps/rejected": -169.241943359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4690632820129395,
"rewards/margins": 15.107036590576172,
"rewards/rejected": -8.63797378540039,
"step": 260
},
{
"epoch": 2.17,
"grad_norm": 1.4401819498743862e-05,
"learning_rate": 9.353932584269662e-06,
"logits/chosen": -23.297456741333008,
"logits/rejected": -23.411481857299805,
"logps/chosen": -41.401039123535156,
"logps/rejected": -169.64952087402344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.529332160949707,
"rewards/margins": 15.224197387695312,
"rewards/rejected": -8.694866180419922,
"step": 286
},
{
"epoch": 2.36,
"grad_norm": 1.4643008398707025e-05,
"learning_rate": 7.162921348314607e-06,
"logits/chosen": -23.277320861816406,
"logits/rejected": -23.393136978149414,
"logps/chosen": -41.65689468383789,
"logps/rejected": -171.18663024902344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.522489070892334,
"rewards/margins": 15.33745002746582,
"rewards/rejected": -8.814961433410645,
"step": 312
},
{
"epoch": 2.56,
"grad_norm": 0.00012409774353727698,
"learning_rate": 4.97191011235955e-06,
"logits/chosen": -23.303037643432617,
"logits/rejected": -23.417268753051758,
"logps/chosen": -41.39340591430664,
"logps/rejected": -170.94407653808594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.569211483001709,
"rewards/margins": 15.359162330627441,
"rewards/rejected": -8.789949417114258,
"step": 338
},
{
"epoch": 2.76,
"grad_norm": 1.4201951671566349e-05,
"learning_rate": 2.7808988764044947e-06,
"logits/chosen": -23.281291961669922,
"logits/rejected": -23.399757385253906,
"logps/chosen": -41.88224792480469,
"logps/rejected": -169.70294189453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.4672112464904785,
"rewards/margins": 15.137907028198242,
"rewards/rejected": -8.670695304870605,
"step": 364
},
{
"epoch": 2.95,
"grad_norm": 1.3474539628077764e-05,
"learning_rate": 5.898876404494382e-07,
"logits/chosen": -23.33159065246582,
"logits/rejected": -23.45261573791504,
"logps/chosen": -41.745670318603516,
"logps/rejected": -169.70147705078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.496582984924316,
"rewards/margins": 15.183600425720215,
"rewards/rejected": -8.687018394470215,
"step": 390
}
],
"logging_steps": 26,
"max_steps": 396,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}