sentiment-pt-pl30-0 / trainer_state.json
apwic's picture
End of training
196302f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 6.223746299743652,
"learning_rate": 4.75e-05,
"loss": 0.5424,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7418546365914787,
"eval_f1": 0.6575164379109477,
"eval_loss": 0.4762427806854248,
"eval_precision": 0.6837301587301587,
"eval_recall": 0.647344971813057,
"eval_runtime": 1.7978,
"eval_samples_per_second": 221.935,
"eval_steps_per_second": 27.811,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 4.059021949768066,
"learning_rate": 4.5e-05,
"loss": 0.4345,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7894736842105263,
"eval_f1": 0.7673663168415792,
"eval_loss": 0.41568055748939514,
"eval_precision": 0.7581367924528302,
"eval_recall": 0.7985542825968357,
"eval_runtime": 1.7989,
"eval_samples_per_second": 221.802,
"eval_steps_per_second": 27.795,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 3.354827404022217,
"learning_rate": 4.25e-05,
"loss": 0.3391,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8180088078011953,
"eval_loss": 0.33879804611206055,
"eval_precision": 0.8323930726843348,
"eval_recall": 0.8071467539552646,
"eval_runtime": 1.8074,
"eval_samples_per_second": 220.763,
"eval_steps_per_second": 27.665,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 2.747511863708496,
"learning_rate": 4e-05,
"loss": 0.2837,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8333016825553572,
"eval_loss": 0.32792460918426514,
"eval_precision": 0.8341507249908615,
"eval_recall": 0.8324695399163484,
"eval_runtime": 1.804,
"eval_samples_per_second": 221.178,
"eval_steps_per_second": 27.717,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 0.42053157091140747,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.2761,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8391129032258065,
"eval_loss": 0.31322285532951355,
"eval_precision": 0.8345705196182396,
"eval_recall": 0.8442444080741953,
"eval_runtime": 1.8022,
"eval_samples_per_second": 221.397,
"eval_steps_per_second": 27.744,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.2977254390716553,
"learning_rate": 3.5e-05,
"loss": 0.2459,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.87468671679198,
"eval_f1": 0.8543795620437956,
"eval_loss": 0.3032587468624115,
"eval_precision": 0.843984962406015,
"eval_recall": 0.868839789052555,
"eval_runtime": 1.8072,
"eval_samples_per_second": 220.78,
"eval_steps_per_second": 27.667,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 2.8183882236480713,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.2321,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8622085718274466,
"eval_loss": 0.2870577275753021,
"eval_precision": 0.8530168716042322,
"eval_recall": 0.8741589379887251,
"eval_runtime": 1.8055,
"eval_samples_per_second": 220.996,
"eval_steps_per_second": 27.694,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 3.4162003993988037,
"learning_rate": 3e-05,
"loss": 0.2206,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8822055137844611,
"eval_f1": 0.8560793854229822,
"eval_loss": 0.2634139955043793,
"eval_precision": 0.8609538327526132,
"eval_recall": 0.8516548463356974,
"eval_runtime": 1.8055,
"eval_samples_per_second": 220.985,
"eval_steps_per_second": 27.692,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 5.009228229522705,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.2067,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8703223612108386,
"eval_loss": 0.2633897066116333,
"eval_precision": 0.8694131129742446,
"eval_recall": 0.8712493180578287,
"eval_runtime": 1.8057,
"eval_samples_per_second": 220.963,
"eval_steps_per_second": 27.69,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 0.9459621906280518,
"learning_rate": 2.5e-05,
"loss": 0.192,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8631217838765008,
"eval_loss": 0.2696186900138855,
"eval_precision": 0.8872804935927859,
"eval_recall": 0.8462447717766868,
"eval_runtime": 1.8049,
"eval_samples_per_second": 221.061,
"eval_steps_per_second": 27.702,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 0.9607306122779846,
"learning_rate": 2.25e-05,
"loss": 0.1866,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8808495451466529,
"eval_loss": 0.2752375304698944,
"eval_precision": 0.8691495353421572,
"eval_recall": 0.8972995090016367,
"eval_runtime": 1.8092,
"eval_samples_per_second": 220.542,
"eval_steps_per_second": 27.637,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 4.809903621673584,
"learning_rate": 2e-05,
"loss": 0.1786,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8972431077694235,
"eval_f1": 0.8792560061999484,
"eval_loss": 0.2651856243610382,
"eval_precision": 0.8707622232472325,
"eval_recall": 0.889798145117294,
"eval_runtime": 1.8065,
"eval_samples_per_second": 220.87,
"eval_steps_per_second": 27.678,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 0.7058310508728027,
"learning_rate": 1.75e-05,
"loss": 0.1695,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.9072681704260651,
"eval_f1": 0.8867007927797945,
"eval_loss": 0.25362077355384827,
"eval_precision": 0.89198606271777,
"eval_recall": 0.8818876159301692,
"eval_runtime": 1.82,
"eval_samples_per_second": 219.234,
"eval_steps_per_second": 27.473,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 7.428104877471924,
"learning_rate": 1.5e-05,
"loss": 0.1664,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8872180451127819,
"eval_f1": 0.8680720368560659,
"eval_loss": 0.2736993730068207,
"eval_precision": 0.8587217615098657,
"eval_recall": 0.8802054919076197,
"eval_runtime": 1.8089,
"eval_samples_per_second": 220.576,
"eval_steps_per_second": 27.641,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 0.6353508234024048,
"learning_rate": 1.25e-05,
"loss": 0.1521,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.9022556390977443,
"eval_f1": 0.8829621606985718,
"eval_loss": 0.26195329427719116,
"eval_precision": 0.8802419354838709,
"eval_recall": 0.8858428805237315,
"eval_runtime": 1.8126,
"eval_samples_per_second": 220.122,
"eval_steps_per_second": 27.584,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 1.580483317375183,
"learning_rate": 1e-05,
"loss": 0.1494,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8760914310475572,
"eval_loss": 0.30298247933387756,
"eval_precision": 0.8629851740796268,
"eval_recall": 0.8962538643389707,
"eval_runtime": 1.8105,
"eval_samples_per_second": 220.383,
"eval_steps_per_second": 27.617,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 3.5637781620025635,
"learning_rate": 7.5e-06,
"loss": 0.1487,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8733660552828726,
"eval_loss": 0.2702126204967499,
"eval_precision": 0.8650109547970479,
"eval_recall": 0.8837515911983997,
"eval_runtime": 1.8055,
"eval_samples_per_second": 220.99,
"eval_steps_per_second": 27.693,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 3.6467041969299316,
"learning_rate": 5e-06,
"loss": 0.1494,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8947368421052632,
"eval_f1": 0.8765906680805938,
"eval_loss": 0.2763027548789978,
"eval_precision": 0.8675710594315245,
"eval_recall": 0.888025095471904,
"eval_runtime": 1.8075,
"eval_samples_per_second": 220.753,
"eval_steps_per_second": 27.663,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 4.27400541305542,
"learning_rate": 2.5e-06,
"loss": 0.1334,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8733660552828726,
"eval_loss": 0.28261518478393555,
"eval_precision": 0.8650109547970479,
"eval_recall": 0.8837515911983997,
"eval_runtime": 1.8039,
"eval_samples_per_second": 221.182,
"eval_steps_per_second": 27.717,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 0.5643185377120972,
"learning_rate": 0.0,
"loss": 0.1325,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8922305764411027,
"eval_f1": 0.8721887408091659,
"eval_loss": 0.27931535243988037,
"eval_precision": 0.866466275659824,
"eval_recall": 0.8787506819421713,
"eval_runtime": 1.804,
"eval_samples_per_second": 221.175,
"eval_steps_per_second": 27.716,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 8444128359504000.0,
"train_loss": 0.2269888150887411,
"train_runtime": 635.9179,
"train_samples_per_second": 114.417,
"train_steps_per_second": 3.837
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 8444128359504000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}