cls1.0 / checkpoint-164 /trainer_state.json
assaflehr's picture
Upload folder using huggingface_hub
ae5292d verified
{
"best_metric": 0.10066879540681839,
"best_model_checkpoint": "autotrain-emecz-j2gix/checkpoint-164",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 164,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04878048780487805,
"grad_norm": 0.3794308602809906,
"learning_rate": 8.000000000000001e-06,
"loss": 0.6976,
"step": 4
},
{
"epoch": 0.0975609756097561,
"grad_norm": 0.671731173992157,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.6944,
"step": 8
},
{
"epoch": 0.14634146341463414,
"grad_norm": 2.235081434249878,
"learning_rate": 2.4e-05,
"loss": 0.6922,
"step": 12
},
{
"epoch": 0.1951219512195122,
"grad_norm": 2.346835136413574,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.6904,
"step": 16
},
{
"epoch": 0.24390243902439024,
"grad_norm": 4.985357284545898,
"learning_rate": 4e-05,
"loss": 0.5521,
"step": 20
},
{
"epoch": 0.2926829268292683,
"grad_norm": 4.204496383666992,
"learning_rate": 4.8e-05,
"loss": 0.3967,
"step": 24
},
{
"epoch": 0.34146341463414637,
"grad_norm": 19.942995071411133,
"learning_rate": 4.997726987107582e-05,
"loss": 0.3278,
"step": 28
},
{
"epoch": 0.3902439024390244,
"grad_norm": 14.574546813964844,
"learning_rate": 4.9876330414334614e-05,
"loss": 0.3968,
"step": 32
},
{
"epoch": 0.43902439024390244,
"grad_norm": 12.140644073486328,
"learning_rate": 4.9694981991119004e-05,
"loss": 0.4434,
"step": 36
},
{
"epoch": 0.4878048780487805,
"grad_norm": 6.911764621734619,
"learning_rate": 4.943381078271214e-05,
"loss": 0.3158,
"step": 40
},
{
"epoch": 0.5365853658536586,
"grad_norm": 9.625554084777832,
"learning_rate": 4.9093660985448097e-05,
"loss": 0.2399,
"step": 44
},
{
"epoch": 0.5853658536585366,
"grad_norm": 1.0161175727844238,
"learning_rate": 4.86756320819752e-05,
"loss": 0.4131,
"step": 48
},
{
"epoch": 0.6341463414634146,
"grad_norm": 12.2647123336792,
"learning_rate": 4.818107528734504e-05,
"loss": 0.1248,
"step": 52
},
{
"epoch": 0.6829268292682927,
"grad_norm": 1.7963744401931763,
"learning_rate": 4.761158918141474e-05,
"loss": 0.197,
"step": 56
},
{
"epoch": 0.7317073170731707,
"grad_norm": 5.169480323791504,
"learning_rate": 4.696901454167988e-05,
"loss": 0.2053,
"step": 60
},
{
"epoch": 0.7804878048780488,
"grad_norm": 7.40244722366333,
"learning_rate": 4.625542839324036e-05,
"loss": 0.3954,
"step": 64
},
{
"epoch": 0.8292682926829268,
"grad_norm": 4.580534934997559,
"learning_rate": 4.547313729513163e-05,
"loss": 0.2515,
"step": 68
},
{
"epoch": 0.8780487804878049,
"grad_norm": 5.525244235992432,
"learning_rate": 4.462466988472237e-05,
"loss": 0.2298,
"step": 72
},
{
"epoch": 0.926829268292683,
"grad_norm": 6.653177261352539,
"learning_rate": 4.371276870427753e-05,
"loss": 0.222,
"step": 76
},
{
"epoch": 0.975609756097561,
"grad_norm": 8.775516510009766,
"learning_rate": 4.274038133610628e-05,
"loss": 0.1531,
"step": 80
},
{
"epoch": 1.0,
"eval_accuracy": 0.9493865030674846,
"eval_auc": 0.9892279647889984,
"eval_f1": 0.9470304975922953,
"eval_loss": 0.14209958910942078,
"eval_precision": 0.9305993690851735,
"eval_recall": 0.9640522875816994,
"eval_runtime": 5.3121,
"eval_samples_per_second": 122.739,
"eval_steps_per_second": 2.071,
"step": 82
},
{
"epoch": 1.024390243902439,
"grad_norm": 0.35274970531463623,
"learning_rate": 4.171065087494909e-05,
"loss": 0.0691,
"step": 84
},
{
"epoch": 1.0731707317073171,
"grad_norm": 0.19416379928588867,
"learning_rate": 4.0626905768400516e-05,
"loss": 0.1768,
"step": 88
},
{
"epoch": 1.1219512195121952,
"grad_norm": 2.028865098953247,
"learning_rate": 3.949264905820697e-05,
"loss": 0.1823,
"step": 92
},
{
"epoch": 1.170731707317073,
"grad_norm": 6.421178817749023,
"learning_rate": 3.831154705721541e-05,
"loss": 0.0778,
"step": 96
},
{
"epoch": 1.2195121951219512,
"grad_norm": 6.153197288513184,
"learning_rate": 3.7087417498572944e-05,
"loss": 0.0399,
"step": 100
},
{
"epoch": 1.2682926829268293,
"grad_norm": 4.175981044769287,
"learning_rate": 3.6143458894413465e-05,
"loss": 0.2834,
"step": 104
},
{
"epoch": 1.3170731707317074,
"grad_norm": 0.08463025838136673,
"learning_rate": 3.485362865576194e-05,
"loss": 0.0894,
"step": 108
},
{
"epoch": 1.3658536585365852,
"grad_norm": 17.901002883911133,
"learning_rate": 3.353194805642477e-05,
"loss": 0.0901,
"step": 112
},
{
"epoch": 1.4146341463414633,
"grad_norm": 18.01471519470215,
"learning_rate": 3.2182689228554517e-05,
"loss": 0.2675,
"step": 116
},
{
"epoch": 1.4634146341463414,
"grad_norm": 2.2452147006988525,
"learning_rate": 3.081021344674632e-05,
"loss": 0.0789,
"step": 120
},
{
"epoch": 1.5121951219512195,
"grad_norm": 9.682868957519531,
"learning_rate": 2.9418957030878874e-05,
"loss": 0.1109,
"step": 124
},
{
"epoch": 1.5609756097560976,
"grad_norm": 0.5994274616241455,
"learning_rate": 2.8013417006383076e-05,
"loss": 0.085,
"step": 128
},
{
"epoch": 1.6097560975609757,
"grad_norm": 7.9580793380737305,
"learning_rate": 2.6598136568289143e-05,
"loss": 0.0938,
"step": 132
},
{
"epoch": 1.6585365853658538,
"grad_norm": 0.2872644066810608,
"learning_rate": 2.517769039603744e-05,
"loss": 0.1155,
"step": 136
},
{
"epoch": 1.7073170731707317,
"grad_norm": 6.185705184936523,
"learning_rate": 2.3756669866520832e-05,
"loss": 0.213,
"step": 140
},
{
"epoch": 1.7560975609756098,
"grad_norm": 2.6776204109191895,
"learning_rate": 2.2339668213154945e-05,
"loss": 0.0741,
"step": 144
},
{
"epoch": 1.8048780487804879,
"grad_norm": 6.972938060760498,
"learning_rate": 2.0931265678947555e-05,
"loss": 0.1297,
"step": 148
},
{
"epoch": 1.8536585365853657,
"grad_norm": 2.5824131965637207,
"learning_rate": 1.9536014711557528e-05,
"loss": 0.0674,
"step": 152
},
{
"epoch": 1.9024390243902438,
"grad_norm": 1.7245702743530273,
"learning_rate": 1.815842524819793e-05,
"loss": 0.0738,
"step": 156
},
{
"epoch": 1.951219512195122,
"grad_norm": 0.3297303020954132,
"learning_rate": 1.680295013794778e-05,
"loss": 0.0852,
"step": 160
},
{
"epoch": 2.0,
"grad_norm": 5.111387252807617,
"learning_rate": 1.547397074859249e-05,
"loss": 0.057,
"step": 164
},
{
"epoch": 2.0,
"eval_accuracy": 0.9708588957055214,
"eval_auc": 0.9946588461974386,
"eval_f1": 0.9691056910569106,
"eval_loss": 0.10066879540681839,
"eval_precision": 0.9644012944983819,
"eval_recall": 0.9738562091503268,
"eval_runtime": 5.3221,
"eval_samples_per_second": 122.509,
"eval_steps_per_second": 2.067,
"step": 164
}
],
"logging_steps": 4,
"max_steps": 246,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 798380148400128.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}