|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04595060310166571, |
|
"eval_steps": 5, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022975301550832855, |
|
"grad_norm": 52.58608627319336, |
|
"learning_rate": 1e-05, |
|
"loss": 128.675, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022975301550832855, |
|
"eval_loss": 8.092684745788574, |
|
"eval_runtime": 39.0855, |
|
"eval_samples_per_second": 18.779, |
|
"eval_steps_per_second": 9.39, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004595060310166571, |
|
"grad_norm": 56.0618782043457, |
|
"learning_rate": 2e-05, |
|
"loss": 129.3595, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0068925904652498565, |
|
"grad_norm": 52.7619743347168, |
|
"learning_rate": 3e-05, |
|
"loss": 137.0491, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009190120620333142, |
|
"grad_norm": 53.4531135559082, |
|
"learning_rate": 4e-05, |
|
"loss": 127.1661, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011487650775416428, |
|
"grad_norm": 51.779945373535156, |
|
"learning_rate": 5e-05, |
|
"loss": 125.7169, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011487650775416428, |
|
"eval_loss": 7.7418437004089355, |
|
"eval_runtime": 16.6436, |
|
"eval_samples_per_second": 44.101, |
|
"eval_steps_per_second": 22.051, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013785180930499713, |
|
"grad_norm": 58.281494140625, |
|
"learning_rate": 6e-05, |
|
"loss": 126.5243, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.016082711085583, |
|
"grad_norm": 65.08502960205078, |
|
"learning_rate": 7e-05, |
|
"loss": 126.5224, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018380241240666284, |
|
"grad_norm": 73.16618347167969, |
|
"learning_rate": 8e-05, |
|
"loss": 124.1406, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02067777139574957, |
|
"grad_norm": 83.0838394165039, |
|
"learning_rate": 9e-05, |
|
"loss": 123.3417, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.022975301550832855, |
|
"grad_norm": 91.16650390625, |
|
"learning_rate": 0.0001, |
|
"loss": 115.9991, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022975301550832855, |
|
"eval_loss": 6.1880202293396, |
|
"eval_runtime": 16.5947, |
|
"eval_samples_per_second": 44.231, |
|
"eval_steps_per_second": 22.116, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02527283170591614, |
|
"grad_norm": 95.79916381835938, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 103.629, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.027570361860999426, |
|
"grad_norm": 95.17044067382812, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 93.1802, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02986789201608271, |
|
"grad_norm": 86.8368911743164, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 83.5648, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.032165422171166, |
|
"grad_norm": 81.78367614746094, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 73.0299, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03446295232624928, |
|
"grad_norm": 77.22337341308594, |
|
"learning_rate": 5e-05, |
|
"loss": 65.4569, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03446295232624928, |
|
"eval_loss": 3.7689990997314453, |
|
"eval_runtime": 16.5689, |
|
"eval_samples_per_second": 44.3, |
|
"eval_steps_per_second": 22.15, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03676048248133257, |
|
"grad_norm": 69.29478454589844, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 63.1268, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.039058012636415854, |
|
"grad_norm": 70.21276092529297, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 56.9275, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04135554279149914, |
|
"grad_norm": 66.2811279296875, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 56.5232, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.043653072946582425, |
|
"grad_norm": 64.68504333496094, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 54.7725, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04595060310166571, |
|
"grad_norm": 59.0147705078125, |
|
"learning_rate": 0.0, |
|
"loss": 54.5287, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04595060310166571, |
|
"eval_loss": 3.296483278274536, |
|
"eval_runtime": 16.5931, |
|
"eval_samples_per_second": 44.235, |
|
"eval_steps_per_second": 22.118, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9317226783965184.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|