Phi-2_PT_3.0_rev4 / trainer_state.json
vitorandrade's picture
Upload 14 files
1f0093a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.41322988975092,
"eval_steps": 50,
"global_step": 1900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.33,
"learning_rate": 5.263157894736842e-06,
"loss": 10.6083,
"step": 50
},
{
"epoch": 0.33,
"eval_loss": 10.407150268554688,
"eval_runtime": 65.9544,
"eval_samples_per_second": 0.561,
"eval_steps_per_second": 0.561,
"step": 50
},
{
"epoch": 0.65,
"learning_rate": 1.0526315789473684e-05,
"loss": 10.0615,
"step": 100
},
{
"epoch": 0.65,
"eval_loss": 9.026963233947754,
"eval_runtime": 66.0596,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 100
},
{
"epoch": 0.98,
"learning_rate": 1.578947368421053e-05,
"loss": 8.0748,
"step": 150
},
{
"epoch": 0.98,
"eval_loss": 6.740030288696289,
"eval_runtime": 66.4335,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 150
},
{
"epoch": 1.31,
"learning_rate": 1.999831241633323e-05,
"loss": 6.2551,
"step": 200
},
{
"epoch": 1.31,
"eval_loss": 5.468978404998779,
"eval_runtime": 66.4722,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 200
},
{
"epoch": 1.63,
"learning_rate": 1.9939306773179498e-05,
"loss": 5.3533,
"step": 250
},
{
"epoch": 1.63,
"eval_loss": 4.9109086990356445,
"eval_runtime": 66.1035,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 250
},
{
"epoch": 1.96,
"learning_rate": 1.979649067087574e-05,
"loss": 4.9846,
"step": 300
},
{
"epoch": 1.96,
"eval_loss": 4.694249629974365,
"eval_runtime": 66.2471,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 300
},
{
"epoch": 2.29,
"learning_rate": 1.9571068366759143e-05,
"loss": 4.8176,
"step": 350
},
{
"epoch": 2.29,
"eval_loss": 4.578726768493652,
"eval_runtime": 66.1659,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 350
},
{
"epoch": 2.61,
"learning_rate": 1.9264940672148018e-05,
"loss": 4.7153,
"step": 400
},
{
"epoch": 2.61,
"eval_loss": 4.5052080154418945,
"eval_runtime": 66.4396,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 400
},
{
"epoch": 2.94,
"learning_rate": 1.888068892427538e-05,
"loss": 4.6504,
"step": 450
},
{
"epoch": 2.94,
"eval_loss": 4.450746536254883,
"eval_runtime": 66.6083,
"eval_samples_per_second": 0.555,
"eval_steps_per_second": 0.555,
"step": 450
},
{
"epoch": 3.27,
"learning_rate": 1.842155321987566e-05,
"loss": 4.5897,
"step": 500
},
{
"epoch": 3.27,
"eval_loss": 4.411241054534912,
"eval_runtime": 66.3951,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 500
},
{
"epoch": 3.59,
"learning_rate": 1.789140509396394e-05,
"loss": 4.5623,
"step": 550
},
{
"epoch": 3.59,
"eval_loss": 4.383650302886963,
"eval_runtime": 66.4603,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 550
},
{
"epoch": 3.92,
"learning_rate": 1.729471487418621e-05,
"loss": 4.5371,
"step": 600
},
{
"epoch": 3.92,
"eval_loss": 4.359971523284912,
"eval_runtime": 66.5261,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 600
},
{
"epoch": 4.25,
"learning_rate": 1.6636513986016215e-05,
"loss": 4.5189,
"step": 650
},
{
"epoch": 4.25,
"eval_loss": 4.340854167938232,
"eval_runtime": 66.0697,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 650
},
{
"epoch": 4.57,
"learning_rate": 1.5922352526649803e-05,
"loss": 4.4797,
"step": 700
},
{
"epoch": 4.57,
"eval_loss": 4.326164722442627,
"eval_runtime": 66.0876,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 700
},
{
"epoch": 4.9,
"learning_rate": 1.5158252465343242e-05,
"loss": 4.4863,
"step": 750
},
{
"epoch": 4.9,
"eval_loss": 4.313417911529541,
"eval_runtime": 66.3814,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 750
},
{
"epoch": 5.23,
"learning_rate": 1.4350656864820733e-05,
"loss": 4.4571,
"step": 800
},
{
"epoch": 5.23,
"eval_loss": 4.301650047302246,
"eval_runtime": 66.0901,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 800
},
{
"epoch": 5.55,
"learning_rate": 1.3506375551927546e-05,
"loss": 4.4562,
"step": 850
},
{
"epoch": 5.55,
"eval_loss": 4.292283058166504,
"eval_runtime": 66.2796,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 850
},
{
"epoch": 5.88,
"learning_rate": 1.2632527695645993e-05,
"loss": 4.4527,
"step": 900
},
{
"epoch": 5.88,
"eval_loss": 4.286748886108398,
"eval_runtime": 66.2321,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 900
},
{
"epoch": 6.21,
"learning_rate": 1.1736481776669307e-05,
"loss": 4.4292,
"step": 950
},
{
"epoch": 6.21,
"eval_loss": 4.277628421783447,
"eval_runtime": 66.6577,
"eval_samples_per_second": 0.555,
"eval_steps_per_second": 0.555,
"step": 950
},
{
"epoch": 6.53,
"learning_rate": 1.0825793454723325e-05,
"loss": 4.423,
"step": 1000
},
{
"epoch": 6.53,
"eval_loss": 4.27248477935791,
"eval_runtime": 66.2871,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 1000
},
{
"epoch": 6.86,
"learning_rate": 9.908141857552737e-06,
"loss": 4.423,
"step": 1050
},
{
"epoch": 6.86,
"eval_loss": 4.266038417816162,
"eval_runtime": 66.5179,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 1050
},
{
"epoch": 7.19,
"learning_rate": 8.991264828797319e-06,
"loss": 4.4166,
"step": 1100
},
{
"epoch": 7.19,
"eval_loss": 4.2608160972595215,
"eval_runtime": 66.1693,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 1100
},
{
"epoch": 7.51,
"learning_rate": 8.082893680762619e-06,
"loss": 4.4156,
"step": 1150
},
{
"epoch": 7.51,
"eval_loss": 4.257768630981445,
"eval_runtime": 66.5456,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 1150
},
{
"epoch": 7.84,
"learning_rate": 7.190688002264308e-06,
"loss": 4.3988,
"step": 1200
},
{
"epoch": 7.84,
"eval_loss": 4.253511428833008,
"eval_runtime": 66.2492,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 1200
},
{
"epoch": 8.17,
"learning_rate": 6.322171071261071e-06,
"loss": 4.4021,
"step": 1250
},
{
"epoch": 8.17,
"eval_loss": 4.251075744628906,
"eval_runtime": 66.2478,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 1250
},
{
"epoch": 8.49,
"learning_rate": 5.484666416891109e-06,
"loss": 4.4038,
"step": 1300
},
{
"epoch": 8.49,
"eval_loss": 4.247603893280029,
"eval_runtime": 66.0732,
"eval_samples_per_second": 0.56,
"eval_steps_per_second": 0.56,
"step": 1300
},
{
"epoch": 8.82,
"learning_rate": 4.685236065835443e-06,
"loss": 4.3881,
"step": 1350
},
{
"epoch": 8.82,
"eval_loss": 4.24613094329834,
"eval_runtime": 66.4204,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 1350
},
{
"epoch": 9.15,
"learning_rate": 3.930620993728434e-06,
"loss": 4.3943,
"step": 1400
},
{
"epoch": 9.15,
"eval_loss": 4.2452192306518555,
"eval_runtime": 66.4796,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 1400
},
{
"epoch": 9.47,
"learning_rate": 3.2271842837425917e-06,
"loss": 4.3822,
"step": 1450
},
{
"epoch": 9.47,
"eval_loss": 4.24231481552124,
"eval_runtime": 66.529,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 1450
},
{
"epoch": 9.8,
"learning_rate": 2.580857471647186e-06,
"loss": 4.4064,
"step": 1500
},
{
"epoch": 9.8,
"eval_loss": 4.241450309753418,
"eval_runtime": 66.3417,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 1500
},
{
"epoch": 10.13,
"learning_rate": 1.9970905297711606e-06,
"loss": 4.3793,
"step": 1550
},
{
"epoch": 10.13,
"eval_loss": 4.239691257476807,
"eval_runtime": 66.2256,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 1550
},
{
"epoch": 10.45,
"learning_rate": 1.4808059116167306e-06,
"loss": 4.385,
"step": 1600
},
{
"epoch": 10.45,
"eval_loss": 4.239114761352539,
"eval_runtime": 66.4764,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 1600
},
{
"epoch": 10.78,
"learning_rate": 1.0363570446297999e-06,
"loss": 4.3919,
"step": 1650
},
{
"epoch": 10.78,
"eval_loss": 4.239101409912109,
"eval_runtime": 66.7261,
"eval_samples_per_second": 0.555,
"eval_steps_per_second": 0.555,
"step": 1650
},
{
"epoch": 11.11,
"learning_rate": 6.67491621125429e-07,
"loss": 4.3934,
"step": 1700
},
{
"epoch": 11.11,
"eval_loss": 4.238786220550537,
"eval_runtime": 66.2354,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 1700
},
{
"epoch": 11.43,
"learning_rate": 3.773199969074959e-07,
"loss": 4.3825,
"step": 1750
},
{
"epoch": 11.43,
"eval_loss": 4.238570690155029,
"eval_runtime": 66.3209,
"eval_samples_per_second": 0.558,
"eval_steps_per_second": 0.558,
"step": 1750
},
{
"epoch": 11.76,
"learning_rate": 1.6828896405244988e-07,
"loss": 4.3835,
"step": 1800
},
{
"epoch": 11.76,
"eval_loss": 4.2383294105529785,
"eval_runtime": 66.2218,
"eval_samples_per_second": 0.559,
"eval_steps_per_second": 0.559,
"step": 1800
},
{
"epoch": 12.09,
"learning_rate": 4.216111901092501e-08,
"loss": 4.3851,
"step": 1850
},
{
"epoch": 12.09,
"eval_loss": 4.238271236419678,
"eval_runtime": 66.3755,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.557,
"step": 1850
},
{
"epoch": 12.41,
"learning_rate": 0.0,
"loss": 4.3848,
"step": 1900
},
{
"epoch": 12.41,
"eval_loss": 4.2382659912109375,
"eval_runtime": 66.5156,
"eval_samples_per_second": 0.556,
"eval_steps_per_second": 0.556,
"step": 1900
}
],
"logging_steps": 50,
"max_steps": 1900,
"num_input_tokens_seen": 0,
"num_train_epochs": 13,
"save_steps": 50,
"total_flos": 7.73200764370944e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}