cnn_10k_100 / checkpoint-2497 /trainer_state.json
xihajun's picture
Upload folder using huggingface_hub
614ef3c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 96.97087378640776,
"eval_steps": 500,
"global_step": 2497,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.15,
"learning_rate": 0.000324,
"loss": 1.6248,
"step": 81
},
{
"epoch": 6.29,
"learning_rate": 0.000648,
"loss": 1.5109,
"step": 162
},
{
"epoch": 9.44,
"learning_rate": 0.000972,
"loss": 1.4155,
"step": 243
},
{
"epoch": 12.58,
"learning_rate": 0.0009671111111111112,
"loss": 1.328,
"step": 324
},
{
"epoch": 15.73,
"learning_rate": 0.0009311111111111112,
"loss": 1.2665,
"step": 405
},
{
"epoch": 18.87,
"learning_rate": 0.0008951111111111111,
"loss": 1.2178,
"step": 486
},
{
"epoch": 22.02,
"learning_rate": 0.0008591111111111112,
"loss": 1.1829,
"step": 567
},
{
"epoch": 25.17,
"learning_rate": 0.0008231111111111112,
"loss": 1.1523,
"step": 648
},
{
"epoch": 28.31,
"learning_rate": 0.0007871111111111111,
"loss": 1.1296,
"step": 729
},
{
"epoch": 31.46,
"learning_rate": 0.000751111111111111,
"loss": 1.1084,
"step": 810
},
{
"epoch": 34.6,
"learning_rate": 0.0007151111111111111,
"loss": 1.0855,
"step": 891
},
{
"epoch": 37.75,
"learning_rate": 0.0006791111111111111,
"loss": 1.0708,
"step": 972
},
{
"epoch": 40.89,
"learning_rate": 0.0006431111111111111,
"loss": 1.0536,
"step": 1053
},
{
"epoch": 44.04,
"learning_rate": 0.0006071111111111112,
"loss": 1.0359,
"step": 1134
},
{
"epoch": 47.18,
"learning_rate": 0.0005711111111111111,
"loss": 1.0246,
"step": 1215
},
{
"epoch": 50.33,
"learning_rate": 0.0005351111111111111,
"loss": 1.0132,
"step": 1296
},
{
"epoch": 53.48,
"learning_rate": 0.0004991111111111111,
"loss": 1.0013,
"step": 1377
},
{
"epoch": 56.62,
"learning_rate": 0.0004631111111111111,
"loss": 0.9878,
"step": 1458
},
{
"epoch": 59.77,
"learning_rate": 0.0004271111111111111,
"loss": 0.9766,
"step": 1539
},
{
"epoch": 62.91,
"learning_rate": 0.0003911111111111111,
"loss": 0.9643,
"step": 1620
},
{
"epoch": 66.06,
"learning_rate": 0.0003551111111111111,
"loss": 0.9538,
"step": 1701
},
{
"epoch": 69.2,
"learning_rate": 0.0003191111111111111,
"loss": 0.9486,
"step": 1782
},
{
"epoch": 72.35,
"learning_rate": 0.0002831111111111111,
"loss": 0.9382,
"step": 1863
},
{
"epoch": 75.5,
"learning_rate": 0.00024711111111111114,
"loss": 0.9255,
"step": 1944
},
{
"epoch": 78.64,
"learning_rate": 0.0002111111111111111,
"loss": 0.9153,
"step": 2025
},
{
"epoch": 81.79,
"learning_rate": 0.0001751111111111111,
"loss": 0.9069,
"step": 2106
},
{
"epoch": 84.93,
"learning_rate": 0.0001391111111111111,
"loss": 0.8996,
"step": 2187
},
{
"epoch": 88.08,
"learning_rate": 0.00010311111111111111,
"loss": 0.888,
"step": 2268
},
{
"epoch": 91.22,
"learning_rate": 6.71111111111111e-05,
"loss": 0.8791,
"step": 2349
},
{
"epoch": 94.37,
"learning_rate": 3.111111111111111e-05,
"loss": 0.8721,
"step": 2430
}
],
"logging_steps": 81,
"max_steps": 2500,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 2.7961865832310505e+19,
"trial_name": null,
"trial_params": null
}