|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.322226629644321, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06611133148221605, |
|
"grad_norm": 1.0232534408569336, |
|
"learning_rate": 4.4080049369655294e-05, |
|
"loss": 5.6838, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1322226629644321, |
|
"grad_norm": 1.0166140794754028, |
|
"learning_rate": 8.816009873931059e-05, |
|
"loss": 3.8378, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1322226629644321, |
|
"eval_accuracy": 0.4720710052887577, |
|
"eval_loss": 3.6431853771209717, |
|
"eval_runtime": 65.5904, |
|
"eval_samples_per_second": 28.053, |
|
"eval_steps_per_second": 1.174, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19833399444664815, |
|
"grad_norm": 1.0467888116836548, |
|
"learning_rate": 9.830315009952811e-05, |
|
"loss": 3.3712, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2644453259288642, |
|
"grad_norm": 1.1263777017593384, |
|
"learning_rate": 9.59831475011252e-05, |
|
"loss": 3.0922, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2644453259288642, |
|
"eval_accuracy": 0.5138609524011809, |
|
"eval_loss": 3.076597213745117, |
|
"eval_runtime": 64.6228, |
|
"eval_samples_per_second": 28.473, |
|
"eval_steps_per_second": 1.192, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.33055665741108026, |
|
"grad_norm": 1.4438892602920532, |
|
"learning_rate": 9.366314490272228e-05, |
|
"loss": 2.9066, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3966679888932963, |
|
"grad_norm": 1.3693314790725708, |
|
"learning_rate": 9.134314230431938e-05, |
|
"loss": 2.7993, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3966679888932963, |
|
"eval_accuracy": 0.5319845054268176, |
|
"eval_loss": 2.84745454788208, |
|
"eval_runtime": 64.9029, |
|
"eval_samples_per_second": 28.35, |
|
"eval_steps_per_second": 1.186, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46277932037551234, |
|
"grad_norm": 1.3279718160629272, |
|
"learning_rate": 8.902313970591646e-05, |
|
"loss": 2.7166, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.5288906518577284, |
|
"grad_norm": 1.465155839920044, |
|
"learning_rate": 8.670313710751356e-05, |
|
"loss": 2.7115, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5288906518577284, |
|
"eval_accuracy": 0.5392130052462777, |
|
"eval_loss": 2.7528512477874756, |
|
"eval_runtime": 65.039, |
|
"eval_samples_per_second": 28.291, |
|
"eval_steps_per_second": 1.184, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5950019833399445, |
|
"grad_norm": 2.4618444442749023, |
|
"learning_rate": 8.438313450911065e-05, |
|
"loss": 2.644, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6611133148221605, |
|
"grad_norm": 3.049086093902588, |
|
"learning_rate": 8.206313191070773e-05, |
|
"loss": 2.6702, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6611133148221605, |
|
"eval_accuracy": 0.5420291625071685, |
|
"eval_loss": 2.7150135040283203, |
|
"eval_runtime": 64.9223, |
|
"eval_samples_per_second": 28.342, |
|
"eval_steps_per_second": 1.186, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7272246463043766, |
|
"grad_norm": 3.927698850631714, |
|
"learning_rate": 7.974312931230483e-05, |
|
"loss": 2.6029, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7933359777865926, |
|
"grad_norm": 4.909026622772217, |
|
"learning_rate": 7.742312671390191e-05, |
|
"loss": 2.6484, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7933359777865926, |
|
"eval_accuracy": 0.543187538497483, |
|
"eval_loss": 2.696218729019165, |
|
"eval_runtime": 64.8633, |
|
"eval_samples_per_second": 28.367, |
|
"eval_steps_per_second": 1.187, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8594473092688086, |
|
"grad_norm": 10.72818660736084, |
|
"learning_rate": 7.510312411549901e-05, |
|
"loss": 2.6474, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9255586407510247, |
|
"grad_norm": 12.435935020446777, |
|
"learning_rate": 7.278312151709609e-05, |
|
"loss": 2.6419, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9255586407510247, |
|
"eval_accuracy": 0.5387701514411334, |
|
"eval_loss": 2.7223353385925293, |
|
"eval_runtime": 68.3123, |
|
"eval_samples_per_second": 26.935, |
|
"eval_steps_per_second": 1.127, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9916699722332408, |
|
"grad_norm": 15.605013847351074, |
|
"learning_rate": 7.046311891869319e-05, |
|
"loss": 2.6239, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0577813037154569, |
|
"grad_norm": 55.199256896972656, |
|
"learning_rate": 6.814311632029027e-05, |
|
"loss": 2.5853, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0577813037154569, |
|
"eval_accuracy": 0.5401743803232727, |
|
"eval_loss": 2.7088677883148193, |
|
"eval_runtime": 66.8855, |
|
"eval_samples_per_second": 27.51, |
|
"eval_steps_per_second": 1.151, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1238926351976728, |
|
"grad_norm": 19.066770553588867, |
|
"learning_rate": 6.582311372188736e-05, |
|
"loss": 2.616, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.190003966679889, |
|
"grad_norm": 25.54907989501953, |
|
"learning_rate": 6.350311112348446e-05, |
|
"loss": 2.6009, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.190003966679889, |
|
"eval_accuracy": 0.5401903103162634, |
|
"eval_loss": 2.703549861907959, |
|
"eval_runtime": 65.0156, |
|
"eval_samples_per_second": 28.301, |
|
"eval_steps_per_second": 1.184, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.256115298162105, |
|
"grad_norm": 24.64689826965332, |
|
"learning_rate": 6.118310852508154e-05, |
|
"loss": 2.622, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.322226629644321, |
|
"grad_norm": 39.75895309448242, |
|
"learning_rate": 5.886310592667864e-05, |
|
"loss": 2.6347, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.322226629644321, |
|
"eval_accuracy": 0.5368293472950872, |
|
"eval_loss": 2.7321841716766357, |
|
"eval_runtime": 70.4109, |
|
"eval_samples_per_second": 26.132, |
|
"eval_steps_per_second": 1.094, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 22689, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.406996333009306e+16, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|