error577's picture
Training in progress, step 176, checkpoint
1706659 verified
raw
history blame
8.08 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2987481434330575,
"eval_steps": 32,
"global_step": 176,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001697432633142372,
"eval_loss": 2.4177019596099854,
"eval_runtime": 13.7299,
"eval_samples_per_second": 18.136,
"eval_steps_per_second": 18.136,
"step": 1
},
{
"epoch": 0.008487163165711862,
"grad_norm": 0.5580189228057861,
"learning_rate": 5e-06,
"loss": 2.0835,
"step": 5
},
{
"epoch": 0.016974326331423723,
"grad_norm": 0.5617932081222534,
"learning_rate": 1e-05,
"loss": 2.3638,
"step": 10
},
{
"epoch": 0.025461489497135583,
"grad_norm": 0.637174129486084,
"learning_rate": 9.99743108100344e-06,
"loss": 2.3443,
"step": 15
},
{
"epoch": 0.033948652662847446,
"grad_norm": 0.7906777858734131,
"learning_rate": 9.989726963751683e-06,
"loss": 2.4875,
"step": 20
},
{
"epoch": 0.042435815828559306,
"grad_norm": 0.7220119833946228,
"learning_rate": 9.976895564745993e-06,
"loss": 2.2905,
"step": 25
},
{
"epoch": 0.050922978994271166,
"grad_norm": 0.4569860100746155,
"learning_rate": 9.95895006911623e-06,
"loss": 2.8207,
"step": 30
},
{
"epoch": 0.05431784426055591,
"eval_loss": 2.396263599395752,
"eval_runtime": 13.7534,
"eval_samples_per_second": 18.105,
"eval_steps_per_second": 18.105,
"step": 32
},
{
"epoch": 0.059410142159983026,
"grad_norm": 0.5562223196029663,
"learning_rate": 9.935908917072253e-06,
"loss": 2.3774,
"step": 35
},
{
"epoch": 0.06789730532569489,
"grad_norm": 0.8851813077926636,
"learning_rate": 9.907795784955327e-06,
"loss": 2.3059,
"step": 40
},
{
"epoch": 0.07638446849140675,
"grad_norm": 0.8263425827026367,
"learning_rate": 9.874639560909118e-06,
"loss": 2.2858,
"step": 45
},
{
"epoch": 0.08487163165711861,
"grad_norm": 0.9496198296546936,
"learning_rate": 9.836474315195148e-06,
"loss": 2.2606,
"step": 50
},
{
"epoch": 0.09335879482283047,
"grad_norm": 0.8389888405799866,
"learning_rate": 9.793339265183303e-06,
"loss": 2.4757,
"step": 55
},
{
"epoch": 0.10184595798854233,
"grad_norm": 0.9090803861618042,
"learning_rate": 9.745278735053345e-06,
"loss": 2.2428,
"step": 60
},
{
"epoch": 0.10863568852111181,
"eval_loss": 2.3315982818603516,
"eval_runtime": 13.995,
"eval_samples_per_second": 17.792,
"eval_steps_per_second": 17.792,
"step": 64
},
{
"epoch": 0.11033312115425419,
"grad_norm": 0.8944710493087769,
"learning_rate": 9.692342110248802e-06,
"loss": 2.361,
"step": 65
},
{
"epoch": 0.11882028431996605,
"grad_norm": 0.8705277442932129,
"learning_rate": 9.63458378673011e-06,
"loss": 2.2061,
"step": 70
},
{
"epoch": 0.1273074474856779,
"grad_norm": 1.0183981657028198,
"learning_rate": 9.572063115079063e-06,
"loss": 2.3014,
"step": 75
},
{
"epoch": 0.13579461065138979,
"grad_norm": 0.9694010615348816,
"learning_rate": 9.504844339512096e-06,
"loss": 2.4273,
"step": 80
},
{
"epoch": 0.14428177381710164,
"grad_norm": 0.6600094437599182,
"learning_rate": 9.432996531865001e-06,
"loss": 2.2039,
"step": 85
},
{
"epoch": 0.1527689369828135,
"grad_norm": 1.437016487121582,
"learning_rate": 9.356593520616948e-06,
"loss": 2.4129,
"step": 90
},
{
"epoch": 0.16125610014852534,
"grad_norm": 1.1358604431152344,
"learning_rate": 9.275713815026732e-06,
"loss": 2.2346,
"step": 95
},
{
"epoch": 0.16295353278166771,
"eval_loss": 2.2801592350006104,
"eval_runtime": 14.3531,
"eval_samples_per_second": 17.348,
"eval_steps_per_second": 17.348,
"step": 96
},
{
"epoch": 0.16974326331423722,
"grad_norm": 0.8347494006156921,
"learning_rate": 9.190440524459203e-06,
"loss": 2.5003,
"step": 100
},
{
"epoch": 0.17823042647994908,
"grad_norm": 0.9528422355651855,
"learning_rate": 9.10086127298478e-06,
"loss": 2.2398,
"step": 105
},
{
"epoch": 0.18671758964566093,
"grad_norm": 0.7451781630516052,
"learning_rate": 9.007068109339783e-06,
"loss": 2.253,
"step": 110
},
{
"epoch": 0.1952047528113728,
"grad_norm": 0.6891763210296631,
"learning_rate": 8.90915741234015e-06,
"loss": 2.0703,
"step": 115
},
{
"epoch": 0.20369191597708466,
"grad_norm": 0.7363041639328003,
"learning_rate": 8.807229791845673e-06,
"loss": 2.3083,
"step": 120
},
{
"epoch": 0.21217907914279652,
"grad_norm": 0.7747501730918884,
"learning_rate": 8.701389985376578e-06,
"loss": 2.2058,
"step": 125
},
{
"epoch": 0.21727137704222363,
"eval_loss": 2.245945692062378,
"eval_runtime": 14.3131,
"eval_samples_per_second": 17.397,
"eval_steps_per_second": 17.397,
"step": 128
},
{
"epoch": 0.22066624230850837,
"grad_norm": 0.8016952276229858,
"learning_rate": 8.591746750488639e-06,
"loss": 2.1023,
"step": 130
},
{
"epoch": 0.22915340547422025,
"grad_norm": 1.4869773387908936,
"learning_rate": 8.478412753017433e-06,
"loss": 2.2433,
"step": 135
},
{
"epoch": 0.2376405686399321,
"grad_norm": 1.111100673675537,
"learning_rate": 8.361504451306585e-06,
"loss": 2.0758,
"step": 140
},
{
"epoch": 0.24612773180564396,
"grad_norm": 0.7958012819290161,
"learning_rate": 8.241141976538944e-06,
"loss": 2.1178,
"step": 145
},
{
"epoch": 0.2546148949713558,
"grad_norm": 0.6610277891159058,
"learning_rate": 8.117449009293668e-06,
"loss": 2.0289,
"step": 150
},
{
"epoch": 0.26310205813706766,
"grad_norm": 0.9238812923431396,
"learning_rate": 7.99055265245608e-06,
"loss": 2.3871,
"step": 155
},
{
"epoch": 0.27158922130277957,
"grad_norm": 0.9540156126022339,
"learning_rate": 7.860583300610849e-06,
"loss": 2.069,
"step": 160
},
{
"epoch": 0.27158922130277957,
"eval_loss": 2.2213292121887207,
"eval_runtime": 14.5462,
"eval_samples_per_second": 17.118,
"eval_steps_per_second": 17.118,
"step": 160
},
{
"epoch": 0.2800763844684914,
"grad_norm": 1.1984089612960815,
"learning_rate": 7.727674506052744e-06,
"loss": 1.9745,
"step": 165
},
{
"epoch": 0.2885635476342033,
"grad_norm": 0.8632123470306396,
"learning_rate": 7.591962841552627e-06,
"loss": 1.9814,
"step": 170
},
{
"epoch": 0.29705071079991513,
"grad_norm": 0.6884729266166687,
"learning_rate": 7.453587760019691e-06,
"loss": 2.1216,
"step": 175
}
],
"logging_steps": 5,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 16,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5596099588915200.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}