|
{ |
|
"best_metric": 0.10066879540681839, |
|
"best_model_checkpoint": "autotrain-emecz-j2gix/checkpoint-164", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 0.3794308602809906, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6976, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0975609756097561, |
|
"grad_norm": 0.671731173992157, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.6944, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.14634146341463414, |
|
"grad_norm": 2.235081434249878, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.6922, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 2.346835136413574, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.6904, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 4.985357284545898, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5521, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2926829268292683, |
|
"grad_norm": 4.204496383666992, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.3967, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.34146341463414637, |
|
"grad_norm": 19.942995071411133, |
|
"learning_rate": 4.997726987107582e-05, |
|
"loss": 0.3278, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 14.574546813964844, |
|
"learning_rate": 4.9876330414334614e-05, |
|
"loss": 0.3968, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.43902439024390244, |
|
"grad_norm": 12.140644073486328, |
|
"learning_rate": 4.9694981991119004e-05, |
|
"loss": 0.4434, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 6.911764621734619, |
|
"learning_rate": 4.943381078271214e-05, |
|
"loss": 0.3158, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5365853658536586, |
|
"grad_norm": 9.625554084777832, |
|
"learning_rate": 4.9093660985448097e-05, |
|
"loss": 0.2399, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 1.0161175727844238, |
|
"learning_rate": 4.86756320819752e-05, |
|
"loss": 0.4131, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6341463414634146, |
|
"grad_norm": 12.2647123336792, |
|
"learning_rate": 4.818107528734504e-05, |
|
"loss": 0.1248, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.6829268292682927, |
|
"grad_norm": 1.7963744401931763, |
|
"learning_rate": 4.761158918141474e-05, |
|
"loss": 0.197, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 5.169480323791504, |
|
"learning_rate": 4.696901454167988e-05, |
|
"loss": 0.2053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 7.40244722366333, |
|
"learning_rate": 4.625542839324036e-05, |
|
"loss": 0.3954, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.8292682926829268, |
|
"grad_norm": 4.580534934997559, |
|
"learning_rate": 4.547313729513163e-05, |
|
"loss": 0.2515, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.8780487804878049, |
|
"grad_norm": 5.525244235992432, |
|
"learning_rate": 4.462466988472237e-05, |
|
"loss": 0.2298, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.926829268292683, |
|
"grad_norm": 6.653177261352539, |
|
"learning_rate": 4.371276870427753e-05, |
|
"loss": 0.222, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 8.775516510009766, |
|
"learning_rate": 4.274038133610628e-05, |
|
"loss": 0.1531, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9493865030674846, |
|
"eval_auc": 0.9892279647889984, |
|
"eval_f1": 0.9470304975922953, |
|
"eval_loss": 0.14209958910942078, |
|
"eval_precision": 0.9305993690851735, |
|
"eval_recall": 0.9640522875816994, |
|
"eval_runtime": 5.3121, |
|
"eval_samples_per_second": 122.739, |
|
"eval_steps_per_second": 2.071, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.024390243902439, |
|
"grad_norm": 0.35274970531463623, |
|
"learning_rate": 4.171065087494909e-05, |
|
"loss": 0.0691, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.0731707317073171, |
|
"grad_norm": 0.19416379928588867, |
|
"learning_rate": 4.0626905768400516e-05, |
|
"loss": 0.1768, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.1219512195121952, |
|
"grad_norm": 2.028865098953247, |
|
"learning_rate": 3.949264905820697e-05, |
|
"loss": 0.1823, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.170731707317073, |
|
"grad_norm": 6.421178817749023, |
|
"learning_rate": 3.831154705721541e-05, |
|
"loss": 0.0778, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 6.153197288513184, |
|
"learning_rate": 3.7087417498572944e-05, |
|
"loss": 0.0399, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2682926829268293, |
|
"grad_norm": 4.175981044769287, |
|
"learning_rate": 3.6143458894413465e-05, |
|
"loss": 0.2834, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.3170731707317074, |
|
"grad_norm": 0.08463025838136673, |
|
"learning_rate": 3.485362865576194e-05, |
|
"loss": 0.0894, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.3658536585365852, |
|
"grad_norm": 17.901002883911133, |
|
"learning_rate": 3.353194805642477e-05, |
|
"loss": 0.0901, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.4146341463414633, |
|
"grad_norm": 18.01471519470215, |
|
"learning_rate": 3.2182689228554517e-05, |
|
"loss": 0.2675, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 2.2452147006988525, |
|
"learning_rate": 3.081021344674632e-05, |
|
"loss": 0.0789, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.5121951219512195, |
|
"grad_norm": 9.682868957519531, |
|
"learning_rate": 2.9418957030878874e-05, |
|
"loss": 0.1109, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.5609756097560976, |
|
"grad_norm": 0.5994274616241455, |
|
"learning_rate": 2.8013417006383076e-05, |
|
"loss": 0.085, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.6097560975609757, |
|
"grad_norm": 7.9580793380737305, |
|
"learning_rate": 2.6598136568289143e-05, |
|
"loss": 0.0938, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.6585365853658538, |
|
"grad_norm": 0.2872644066810608, |
|
"learning_rate": 2.517769039603744e-05, |
|
"loss": 0.1155, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 6.185705184936523, |
|
"learning_rate": 2.3756669866520832e-05, |
|
"loss": 0.213, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7560975609756098, |
|
"grad_norm": 2.6776204109191895, |
|
"learning_rate": 2.2339668213154945e-05, |
|
"loss": 0.0741, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.8048780487804879, |
|
"grad_norm": 6.972938060760498, |
|
"learning_rate": 2.0931265678947555e-05, |
|
"loss": 0.1297, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.8536585365853657, |
|
"grad_norm": 2.5824131965637207, |
|
"learning_rate": 1.9536014711557528e-05, |
|
"loss": 0.0674, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.9024390243902438, |
|
"grad_norm": 1.7245702743530273, |
|
"learning_rate": 1.815842524819793e-05, |
|
"loss": 0.0738, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 0.3297303020954132, |
|
"learning_rate": 1.680295013794778e-05, |
|
"loss": 0.0852, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.111387252807617, |
|
"learning_rate": 1.547397074859249e-05, |
|
"loss": 0.057, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9708588957055214, |
|
"eval_auc": 0.9946588461974386, |
|
"eval_f1": 0.9691056910569106, |
|
"eval_loss": 0.10066879540681839, |
|
"eval_precision": 0.9644012944983819, |
|
"eval_recall": 0.9738562091503268, |
|
"eval_runtime": 5.3221, |
|
"eval_samples_per_second": 122.509, |
|
"eval_steps_per_second": 2.067, |
|
"step": 164 |
|
} |
|
], |
|
"logging_steps": 4, |
|
"max_steps": 246, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 798380148400128.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|