llama3-70b-lora-alpaca-11-v1 / trainer_state.json
chansung's picture
Model save
0345a2e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 68,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014705882352941176,
"grad_norm": 0.29233628511428833,
"learning_rate": 2.857142857142857e-05,
"loss": 1.8412,
"step": 1
},
{
"epoch": 0.07352941176470588,
"grad_norm": 0.21557892858982086,
"learning_rate": 0.00014285714285714287,
"loss": 1.8158,
"step": 5
},
{
"epoch": 0.14705882352941177,
"grad_norm": 0.27912023663520813,
"learning_rate": 0.00019880878960910772,
"loss": 1.7702,
"step": 10
},
{
"epoch": 0.22058823529411764,
"grad_norm": 0.19626685976982117,
"learning_rate": 0.0001916316904487005,
"loss": 1.6706,
"step": 15
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.15178152918815613,
"learning_rate": 0.00017841198065767107,
"loss": 1.5842,
"step": 20
},
{
"epoch": 0.36764705882352944,
"grad_norm": 0.10677345097064972,
"learning_rate": 0.00016002142805483685,
"loss": 1.5496,
"step": 25
},
{
"epoch": 0.4411764705882353,
"grad_norm": 0.08041682839393616,
"learning_rate": 0.00013767278936351854,
"loss": 1.5281,
"step": 30
},
{
"epoch": 0.5147058823529411,
"grad_norm": 0.07561662793159485,
"learning_rate": 0.00011283983551465511,
"loss": 1.5326,
"step": 35
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.09348361939191818,
"learning_rate": 8.71601644853449e-05,
"loss": 1.5256,
"step": 40
},
{
"epoch": 0.6617647058823529,
"grad_norm": 0.08627992123365402,
"learning_rate": 6.232721063648148e-05,
"loss": 1.5018,
"step": 45
},
{
"epoch": 0.7352941176470589,
"grad_norm": 0.09307563304901123,
"learning_rate": 3.997857194516319e-05,
"loss": 1.526,
"step": 50
},
{
"epoch": 0.8088235294117647,
"grad_norm": 0.08465312421321869,
"learning_rate": 2.1588019342328968e-05,
"loss": 1.495,
"step": 55
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.08478015661239624,
"learning_rate": 8.368309551299536e-06,
"loss": 1.4859,
"step": 60
},
{
"epoch": 0.9558823529411765,
"grad_norm": 0.07574094086885452,
"learning_rate": 1.1912103908922945e-06,
"loss": 1.4916,
"step": 65
},
{
"epoch": 1.0,
"eval_loss": 1.4919378757476807,
"eval_runtime": 11.3628,
"eval_samples_per_second": 15.137,
"eval_steps_per_second": 0.176,
"step": 68
},
{
"epoch": 1.0,
"step": 68,
"total_flos": 4.647146244454482e+17,
"train_loss": 1.5711571276187897,
"train_runtime": 2260.7477,
"train_samples_per_second": 7.666,
"train_steps_per_second": 0.03
}
],
"logging_steps": 5,
"max_steps": 68,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.647146244454482e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}