|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2339, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04275331338178709, |
|
"grad_norm": 0.15714330971240997, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.323, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08550662676357418, |
|
"grad_norm": 0.22236719727516174, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.2818, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12825994014536127, |
|
"grad_norm": 0.4057689309120178, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1665, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17101325352714836, |
|
"grad_norm": 0.6240995526313782, |
|
"learning_rate": 1.9881538840448035e-05, |
|
"loss": 2.0323, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21376656690893545, |
|
"grad_norm": 1.0177165269851685, |
|
"learning_rate": 1.9528961971056615e-05, |
|
"loss": 1.9214, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25651988029072254, |
|
"grad_norm": 0.8750305771827698, |
|
"learning_rate": 1.8950622724781605e-05, |
|
"loss": 1.8745, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2992731936725096, |
|
"grad_norm": 1.0784953832626343, |
|
"learning_rate": 1.816022324916863e-05, |
|
"loss": 1.8557, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3420265070542967, |
|
"grad_norm": 1.0847351551055908, |
|
"learning_rate": 1.717648987189577e-05, |
|
"loss": 1.7955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3847798204360838, |
|
"grad_norm": 1.1327402591705322, |
|
"learning_rate": 1.6022729432275364e-05, |
|
"loss": 1.7167, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4275331338178709, |
|
"grad_norm": 1.1066670417785645, |
|
"learning_rate": 1.4726277090211945e-05, |
|
"loss": 1.6959, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47028644719965795, |
|
"grad_norm": 1.1382914781570435, |
|
"learning_rate": 1.3317848695254441e-05, |
|
"loss": 1.634, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5130397605814451, |
|
"grad_norm": 1.3232320547103882, |
|
"learning_rate": 1.1830813059565374e-05, |
|
"loss": 1.6383, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5557930739632322, |
|
"grad_norm": 1.2707828283309937, |
|
"learning_rate": 1.0300401376284509e-05, |
|
"loss": 1.662, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5985463873450192, |
|
"grad_norm": 1.1388013362884521, |
|
"learning_rate": 8.762872513930507e-06, |
|
"loss": 1.6272, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6412997007268063, |
|
"grad_norm": 1.339849829673767, |
|
"learning_rate": 7.254653962879187e-06, |
|
"loss": 1.6351, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6840530141085934, |
|
"grad_norm": 1.3893671035766602, |
|
"learning_rate": 5.8114787868136125e-06, |
|
"loss": 1.5628, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7268063274903805, |
|
"grad_norm": 1.172720193862915, |
|
"learning_rate": 4.4675390266924536e-06, |
|
"loss": 1.6123, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7695596408721675, |
|
"grad_norm": 1.1993151903152466, |
|
"learning_rate": 3.2546756149860935e-06, |
|
"loss": 1.5979, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8123129542539547, |
|
"grad_norm": 1.6297610998153687, |
|
"learning_rate": 2.2016239929203174e-06, |
|
"loss": 1.6089, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8550662676357418, |
|
"grad_norm": 1.2422642707824707, |
|
"learning_rate": 1.3333333037387176e-06, |
|
"loss": 1.5803, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8978195810175289, |
|
"grad_norm": 1.5311039686203003, |
|
"learning_rate": 6.703752918150241e-07, |
|
"loss": 1.6131, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9405728943993159, |
|
"grad_norm": 1.3912514448165894, |
|
"learning_rate": 2.2845691211458298e-07, |
|
"loss": 1.5255, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.983326207781103, |
|
"grad_norm": 1.408026099205017, |
|
"learning_rate": 1.8048197374724852e-08, |
|
"loss": 1.5973, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2339, |
|
"total_flos": 4.259518857216e+16, |
|
"train_loss": 1.7606283989458218, |
|
"train_runtime": 1228.9387, |
|
"train_samples_per_second": 3.806, |
|
"train_steps_per_second": 1.903 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2339, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.259518857216e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|