vntl-7b-v0.2-qlora / checkpoint-40 /trainer_state.json
lmg-anon's picture
Upload 50 files
86f9061
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09592326139088729,
"eval_steps": 500,
"global_step": 40,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00013,
"loss": 1.1241,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 0.00026,
"loss": 1.0107,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 0.00039,
"loss": 1.1086,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 0.00052,
"loss": 1.0044,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 0.00065,
"loss": 1.0496,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 0.0005933661039639299,
"loss": 1.0199,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 0.0005493502655735357,
"loss": 1.0198,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 0.0005138701197773616,
"loss": 0.969,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 0.0004844813951249544,
"loss": 0.9383,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 0.0004596194077712558,
"loss": 0.8776,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 0.0004382299106011073,
"loss": 1.0173,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 0.0004195731958391368,
"loss": 1.1173,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 0.0004031128874149274,
"loss": 1.0876,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 0.0003884492980336779,
"loss": 1.0524,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 0.0003752776749732568,
"loss": 0.8953,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 0.00036336104634371584,
"loss": 1.1335,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 0.00035251199395531623,
"loss": 0.9837,
"step": 17
},
{
"epoch": 0.04,
"learning_rate": 0.00034258007985157445,
"loss": 0.9707,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 0.0003334429644276751,
"loss": 0.9149,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 0.000325,
"loss": 1.0043,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 0.00031716752370827323,
"loss": 1.001,
"step": 21
},
{
"epoch": 0.05,
"learning_rate": 0.00030987534150481746,
"loss": 1.0395,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 0.000303064062678102,
"loss": 0.8718,
"step": 23
},
{
"epoch": 0.06,
"learning_rate": 0.00029668305198196496,
"loss": 1.1114,
"step": 24
},
{
"epoch": 0.06,
"learning_rate": 0.00029068883707497264,
"loss": 0.7765,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 0.0002850438562747845,
"loss": 0.9522,
"step": 26
},
{
"epoch": 0.06,
"learning_rate": 0.00027971546389275785,
"loss": 0.9588,
"step": 27
},
{
"epoch": 0.07,
"learning_rate": 0.00027467513278676785,
"loss": 1.0313,
"step": 28
},
{
"epoch": 0.07,
"learning_rate": 0.0002698978095246549,
"loss": 0.9338,
"step": 29
},
{
"epoch": 0.07,
"learning_rate": 0.000265361388801511,
"loss": 0.892,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 0.00026104628189331215,
"loss": 0.893,
"step": 31
},
{
"epoch": 0.08,
"learning_rate": 0.0002569350598886808,
"loss": 0.8983,
"step": 32
},
{
"epoch": 0.08,
"learning_rate": 0.00025301215685249496,
"loss": 0.9277,
"step": 33
},
{
"epoch": 0.08,
"learning_rate": 0.00024926362137539537,
"loss": 0.8962,
"step": 34
},
{
"epoch": 0.08,
"learning_rate": 0.00024567690745599767,
"loss": 0.9124,
"step": 35
},
{
"epoch": 0.09,
"learning_rate": 0.0002422406975624772,
"loss": 0.9535,
"step": 36
},
{
"epoch": 0.09,
"learning_rate": 0.00023894475218048754,
"loss": 0.9019,
"step": 37
},
{
"epoch": 0.09,
"learning_rate": 0.0002357797812857538,
"loss": 1.024,
"step": 38
},
{
"epoch": 0.09,
"learning_rate": 0.00023273733406281566,
"loss": 0.8549,
"step": 39
},
{
"epoch": 0.1,
"learning_rate": 0.0002298097038856279,
"loss": 1.0489,
"step": 40
}
],
"logging_steps": 1,
"max_steps": 417,
"num_train_epochs": 1,
"save_steps": 10,
"total_flos": 1.2924943770845184e+16,
"trial_name": null,
"trial_params": null
}